From 37e770f1cb79b56dace3687939ae11c5b1a4dc42 Mon Sep 17 00:00:00 2001 From: Deukhoofd Date: Mon, 20 May 2019 17:45:03 +0200 Subject: [PATCH] Implements expression parsing --- .idea/vcs.xml | 1 - CMakeLists.txt | 4 +- src/Parser/BinaryOperatorKind.hpp | 13 + .../ParsedExpressions/ParsedExpression.hpp | 142 +++++++++++ .../ParsedStatements/ParsedStatement.hpp | 77 ++++++ src/Parser/Parser.cpp | 232 ++++++++++++++++++ src/Parser/Parser.hpp | 38 +++ src/Parser/Token.hpp | 4 + src/Parser/TokenKind.hpp | 2 +- src/Parser/UnaryOperatorKind.hpp | 10 + 10 files changed, 519 insertions(+), 4 deletions(-) create mode 100644 src/Parser/BinaryOperatorKind.hpp create mode 100644 src/Parser/ParsedExpressions/ParsedExpression.hpp create mode 100644 src/Parser/ParsedStatements/ParsedStatement.hpp create mode 100644 src/Parser/Parser.cpp create mode 100644 src/Parser/Parser.hpp create mode 100644 src/Parser/UnaryOperatorKind.hpp diff --git a/.idea/vcs.xml b/.idea/vcs.xml index b27de7c..94a25f7 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -2,6 +2,5 @@ - \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 814efea..2911d96 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,8 +5,8 @@ set(CMAKE_CXX_STANDARD 17) #add_subdirectory(extern) include_directories(extern) -add_library(PorygonLang src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp) -add_executable(PorygonLangTests src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp src/Parser/LexerTests.cpp) +add_library(PorygonLang src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp src/Parser/Parser.cpp src/Parser/Parser.hpp src/Parser/ParsedStatements/ParsedStatement.hpp src/Parser/ParsedExpressions/ParsedExpression.hpp src/Parser/BinaryOperatorKind.hpp) +add_executable(PorygonLangTests src/main.cpp src/main.h++ src/Parser/Lexer.cpp src/Parser/Lexer.hpp src/Parser/TokenKind.hpp src/Parser/Token.hpp src/Parser/LexerTests.cpp src/Parser/Parser.cpp src/Parser/Parser.hpp src/Parser/ParsedStatements/ParsedStatement.hpp src/Parser/ParsedExpressions/ParsedExpression.hpp src/Parser/BinaryOperatorKind.hpp) target_compile_definitions(PorygonLangTests PRIVATE TESTS_BUILD) diff --git a/src/Parser/BinaryOperatorKind.hpp b/src/Parser/BinaryOperatorKind.hpp new file mode 100644 index 0000000..0f7effb --- /dev/null +++ b/src/Parser/BinaryOperatorKind.hpp @@ -0,0 +1,13 @@ + +#ifndef PORYGONLANG_BINARYOPERATORKIND_HPP +#define PORYGONLANG_BINARYOPERATORKIND_HPP +enum class BinaryOperatorKind{ + Addition, + Subtraction, + Multiplication, + Division, + Equality, + LogicalAnd, + LogicalOr, +}; +#endif //PORYGONLANG_BINARYOPERATORKIND_HPP diff --git a/src/Parser/ParsedExpressions/ParsedExpression.hpp b/src/Parser/ParsedExpressions/ParsedExpression.hpp new file mode 100644 index 0000000..9a1d71b --- /dev/null +++ b/src/Parser/ParsedExpressions/ParsedExpression.hpp @@ -0,0 +1,142 @@ + +#ifndef PORYGONLANG_PARSEDEXPRESSION_HPP +#define PORYGONLANG_PARSEDEXPRESSION_HPP + +#include "../Token.hpp" +#include "../UnaryOperatorKind.hpp" +#include "../BinaryOperatorKind.hpp" + +enum class ParsedExpressionKind{ + LiteralInteger, + LiteralFloat, + LiteralString, + LiteralBool, + + Unary, + Binary, +}; + +class ParsedExpression { + unsigned int _position; + unsigned int _length; +public: + virtual ParsedExpressionKind GetKind() = 0; + + ParsedExpression(unsigned int position, unsigned int length){ + _position = position; + _length = length; + } + + unsigned int GetStartPosition(){ + return _position; + } + + unsigned int GetEndPosition(){ + return _position + _length - 1; + } + + unsigned int GetLength(){ + return _length; + } +}; + +class LiteralIntegerExpression : public ParsedExpression{ + long _value; +public: + ParsedExpressionKind GetKind() final{ + return ParsedExpressionKind::LiteralInteger; + } + explicit LiteralIntegerExpression(IntegerToken* token) : ParsedExpression(token -> GetStartPosition(), token -> GetLength()){ + _value = token->Value; + } + + long GetValue(){ + return _value; + } +}; + +class LiteralFloatExpression : public ParsedExpression{ + double _value; +public: + ParsedExpressionKind GetKind() final{ + return ParsedExpressionKind::LiteralFloat; + } + explicit LiteralFloatExpression(FloatToken* token) : ParsedExpression(token -> GetStartPosition(), token -> GetLength()){ + _value = token->Value; + } + + double GetValue(){ + return _value; + } +}; + +class LiteralBoolExpression : public ParsedExpression{ + bool _value; +public: + ParsedExpressionKind GetKind() final{ + return ParsedExpressionKind::LiteralBool; + } + explicit LiteralBoolExpression(IToken* token) : ParsedExpression(token -> GetStartPosition(), token -> GetLength()){ + _value = token -> GetKind() == TokenKind::TrueKeyword; + } + + bool GetValue(){ + return _value; + } +}; + +class UnaryExpression : public ParsedExpression{ + UnaryOperatorKind _kind; + ParsedExpression* _operand; +public: + ParsedExpressionKind GetKind() final{ + return ParsedExpressionKind::Unary; + } + + UnaryExpression(UnaryOperatorKind kind, ParsedExpression* operand, unsigned int start, unsigned int length) + : ParsedExpression(start, length){ + _kind = kind; + _operand = operand; + } + + UnaryOperatorKind GetOperatorKind(){ + return _kind; + } + + ParsedExpression* GetOperand(){ + return _operand; + } +}; + +class BinaryExpression : public ParsedExpression{ + BinaryOperatorKind _kind; + ParsedExpression* _left; + ParsedExpression* _right; +public: + ParsedExpressionKind GetKind() final{ + return ParsedExpressionKind::Binary; + } + + BinaryExpression(BinaryOperatorKind kind, ParsedExpression* left, ParsedExpression* right, unsigned int start, + unsigned int length) + : ParsedExpression(start, length){ + _kind = kind; + _left = left; + _right = right; + } + + BinaryOperatorKind GetOperatorKind(){ + return _kind; + } + + ParsedExpression* GetLeft() { + return _left; + } + + ParsedExpression* GetRight() { + return _right; + } +}; + + +#endif //PORYGONLANG_PARSEDEXPRESSION_HPP diff --git a/src/Parser/ParsedStatements/ParsedStatement.hpp b/src/Parser/ParsedStatements/ParsedStatement.hpp new file mode 100644 index 0000000..ca44e2d --- /dev/null +++ b/src/Parser/ParsedStatements/ParsedStatement.hpp @@ -0,0 +1,77 @@ +#include + + +#ifndef PORYGONLANG_PARSEDSTATEMENT_HPP +#define PORYGONLANG_PARSEDSTATEMENT_HPP + +#include +#include "../ParsedExpressions/ParsedExpression.hpp" + +enum class ParsedStatementKind{ + Script, + Block, + Expression, +}; + +class ParsedStatement { + unsigned int _start; + unsigned int _length; +public: + virtual ParsedStatementKind GetKind() = 0; + ParsedStatement(unsigned int start, unsigned int length){ + _start = start; + _length = length; + } + + unsigned int GetStartPosition(){ + return _start; + } + + unsigned int GetEndPosition(){ + return _start + _length - 1; + } +}; + +class ParsedBlockStatement : public ParsedStatement{ + std::vector _statements; +public: + explicit ParsedBlockStatement(std::vector statements) + : ParsedStatement(statements.front()->GetStartPosition(), statements.back()->GetEndPosition() - statements.front()->GetStartPosition()){ + _statements = std::move(statements); + } + + ParsedStatementKind GetKind() override{ + return ParsedStatementKind ::Block; + } + + std::vector GetStatements(){ + return _statements; + } +}; + +class ParsedScriptStatement : public ParsedBlockStatement{ +public: + explicit ParsedScriptStatement(std::vector statements) : ParsedBlockStatement(statements){} + + ParsedStatementKind GetKind() final{ + return ParsedStatementKind ::Script; + } +}; + +class ParsedExpressionStatement : public ParsedStatement{ + ParsedExpression* _expression; +public: + explicit ParsedExpressionStatement(ParsedExpression* expression) : ParsedStatement(expression->GetStartPosition(), expression->GetLength()){ + _expression = expression; + } + + ParsedStatementKind GetKind() final{ + return ParsedStatementKind ::Expression; + } + + ParsedExpression* GetExpression(){ + return _expression; + } +}; + +#endif //PORYGONLANG_PARSEDSTATEMENT_HPP diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp new file mode 100644 index 0000000..db3887e --- /dev/null +++ b/src/Parser/Parser.cpp @@ -0,0 +1,232 @@ + +#include "Parser.hpp" +#include "UnaryOperatorKind.hpp" +#include "BinaryOperatorKind.hpp" + + +ParsedScriptStatement* Parser::Parse() { + vector statements; + while (true){ + auto next = Parser::Next(); + if (next->GetKind() == TokenKind::EndOfFile){ + break; + } + if (next->GetKind() == TokenKind::WhiteSpace){ + continue; + } + statements.push_back(Parser::ParseStatement(next)); + } + return new ParsedScriptStatement(statements); +} + +ParsedStatement* Parser::ParseStatement(IToken* current){ + return new ParsedExpressionStatement(Parser::ParseExpression(current)); +} + +ParsedExpression* Parser::ParseExpression(IToken* current){ + return Parser::ParseBinaryExpression(current, OperatorPrecedence::No); +} + + +OperatorPrecedence GetUnaryPrecedence(TokenKind kind){ + switch (kind){ + case TokenKind::PlusToken: + case TokenKind::MinusToken: + case TokenKind::NotKeyword: + return OperatorPrecedence::Unary; + default: + return OperatorPrecedence::No; + } +} + +UnaryOperatorKind GetUnaryOperatorKind(TokenKind kind){ + switch (kind){ + case TokenKind::PlusToken: return UnaryOperatorKind::Identity; + case TokenKind::MinusToken: return UnaryOperatorKind::Negation; + case TokenKind::NotKeyword: return UnaryOperatorKind::LogicalNegation; + default: + throw; + } +} + +BinaryOperatorKind GetBinaryOperatorKind(TokenKind kind){ + switch (kind){ + case TokenKind::PlusToken: return BinaryOperatorKind ::Addition; + case TokenKind::MinusToken: return BinaryOperatorKind ::Subtraction; + case TokenKind::StarToken: return BinaryOperatorKind ::Multiplication; + case TokenKind::SlashToken: return BinaryOperatorKind ::Division; + case TokenKind::EqualityToken: return BinaryOperatorKind ::Equality; + case TokenKind::AndKeyword: return BinaryOperatorKind ::LogicalAnd; + case TokenKind::OrKeyword: return BinaryOperatorKind ::LogicalOr; + default: + throw; + } +} + +OperatorPrecedence GetBinaryPrecedence(TokenKind kind){ + switch (kind){ + case TokenKind::PlusToken: return OperatorPrecedence ::Additive; + case TokenKind::MinusToken: return OperatorPrecedence ::Additive; + case TokenKind::StarToken: return OperatorPrecedence ::Multiplication; + case TokenKind::SlashToken: return OperatorPrecedence ::Multiplication; + case TokenKind::EqualityToken: return OperatorPrecedence ::Equality; + case TokenKind::AndKeyword: return OperatorPrecedence ::LogicalAnd; + case TokenKind::OrKeyword: return OperatorPrecedence ::LogicalOr; + default: + return OperatorPrecedence::No; + } +} + +ParsedExpression* Parser::ParseBinaryExpression(IToken* current, OperatorPrecedence parentPrecedence){ + OperatorPrecedence unaryPrecedence = GetUnaryPrecedence(current -> GetKind()); + ParsedExpression* left; + if (unaryPrecedence != OperatorPrecedence::No && unaryPrecedence >= parentPrecedence){ + UnaryOperatorKind operatorKind = GetUnaryOperatorKind(current -> GetKind()); + auto next = Parser::Next(); + auto operand = Parser::ParseBinaryExpression(next, unaryPrecedence); + auto startPos = current -> GetStartPosition(); + left = new UnaryExpression(operatorKind, operand, startPos, operand -> GetEndPosition() - startPos); + } else{ + left = Parser::ParsePrimaryExpression(current); + } + while (true){ + auto next = Parser::Peek(); + OperatorPrecedence binaryPrecedence = GetBinaryPrecedence(next -> GetKind()); + if (binaryPrecedence == OperatorPrecedence::No || binaryPrecedence <= parentPrecedence){ + break; + } + auto operatorKind = GetBinaryOperatorKind(next -> GetKind()); + Parser::Next(); + auto right = ParseBinaryExpression(Parser::Next(), binaryPrecedence); + auto startPos = left -> GetStartPosition(); + left = new BinaryExpression(operatorKind, left, right, startPos, right -> GetEndPosition() - startPos); + } + return left; +} + +ParsedExpression *Parser::ParsePrimaryExpression(IToken *current) { + switch (current -> GetKind()){ + case TokenKind ::Integer: return new LiteralIntegerExpression((IntegerToken*)current); + case TokenKind ::Float: return new LiteralFloatExpression((FloatToken*)current); + case TokenKind ::TrueKeyword: return new LiteralBoolExpression(current); + case TokenKind ::FalseKeyword: return new LiteralBoolExpression(current); + default: + throw; + } +} + + +IToken *Parser::Peek() { + return Parser::_tokens[_position]; +} + +IToken *Parser::Next() { + Parser::_position++; + return Parser::_tokens[_position - 1]; +} + +#ifdef TESTS_BUILD +#include + +TEST_CASE( "Parse single true keyword", "[parser]" ) { + vector v {new SimpleToken(TokenKind::TrueKeyword,0,0), new SimpleToken(TokenKind::EndOfFile,0,0)}; + Parser parser = Parser(v); + auto parsedStatements = parser.Parse() -> GetStatements(); + REQUIRE(parsedStatements.size() == 1); + auto firstStatement = parsedStatements[0]; + REQUIRE(firstStatement -> GetKind() == ParsedStatementKind::Expression); + auto expression = ((ParsedExpressionStatement*)firstStatement)->GetExpression(); + REQUIRE(expression -> GetKind() == ParsedExpressionKind::LiteralBool); + auto boolean = ((LiteralBoolExpression*)expression); + REQUIRE(boolean->GetValue() == true); +} + +TEST_CASE( "Parse single false keyword", "[parser]" ) { + vector v {new SimpleToken(TokenKind::FalseKeyword,0,0), new SimpleToken(TokenKind::EndOfFile,0,0)}; + Parser parser = Parser(v); + auto parsedStatements = parser.Parse() -> GetStatements(); + REQUIRE(parsedStatements.size() == 1); + auto firstStatement = parsedStatements[0]; + REQUIRE(firstStatement -> GetKind() == ParsedStatementKind::Expression); + auto expression = ((ParsedExpressionStatement*)firstStatement)->GetExpression(); + REQUIRE(expression -> GetKind() == ParsedExpressionKind::LiteralBool); + auto boolean = ((LiteralBoolExpression*)expression); + REQUIRE(boolean->GetValue() == false); +} + +TEST_CASE( "Parse simple addition", "[parser]" ) { + vector v { + new IntegerToken(5, 0, 0), + new SimpleToken(TokenKind::PlusToken,0,0), + new IntegerToken(10, 0, 0), + new SimpleToken(TokenKind::EndOfFile,0,0) + }; + Parser parser = Parser(v); + auto parsedStatements = parser.Parse() -> GetStatements(); + REQUIRE(parsedStatements.size() == 1); + auto firstStatement = parsedStatements[0]; + REQUIRE(firstStatement -> GetKind() == ParsedStatementKind::Expression); + auto expression = ((ParsedExpressionStatement*)firstStatement)->GetExpression(); + REQUIRE(expression -> GetKind() == ParsedExpressionKind::Binary); + auto binary = ((BinaryExpression*)expression); + CHECK(binary -> GetOperatorKind() == BinaryOperatorKind::Addition); + auto left = binary->GetLeft(); + auto right = binary->GetRight(); + REQUIRE(left->GetKind() == ParsedExpressionKind::LiteralInteger); + REQUIRE(right->GetKind() == ParsedExpressionKind::LiteralInteger); + CHECK(((LiteralIntegerExpression*)left)->GetValue() == 5); + CHECK(((LiteralIntegerExpression*)right)->GetValue() == 10); +} + +TEST_CASE( "Parse simple negation", "[parser]" ) { + vector v { + new SimpleToken(TokenKind::MinusToken,0,0), + new IntegerToken(10, 0, 0), + new SimpleToken(TokenKind::EndOfFile,0,0) + }; + Parser parser = Parser(v); + auto parsedStatements = parser.Parse() -> GetStatements(); + REQUIRE(parsedStatements.size() == 1); + auto firstStatement = parsedStatements[0]; + REQUIRE(firstStatement -> GetKind() == ParsedStatementKind::Expression); + auto expression = ((ParsedExpressionStatement*)firstStatement)->GetExpression(); + REQUIRE(expression -> GetKind() == ParsedExpressionKind::Unary); + auto unary = ((UnaryExpression*)expression); + CHECK(unary -> GetOperatorKind() == UnaryOperatorKind::Negation); + auto operand = unary->GetOperand(); + REQUIRE(operand->GetKind() == ParsedExpressionKind::LiteralInteger); + CHECK(((LiteralIntegerExpression*)operand)->GetValue() == 10); +} + + +TEST_CASE( "Assert binary precedence", "[parser]" ) { + vector v { + new IntegerToken(5, 0, 0), + new SimpleToken(TokenKind::PlusToken,0,0), + new IntegerToken(10, 0, 0), + new SimpleToken(TokenKind::StarToken,0,0), + new IntegerToken(6, 0, 0), + new SimpleToken(TokenKind::EndOfFile,0,0) + }; + Parser parser = Parser(v); + auto parsedStatements = parser.Parse() -> GetStatements(); + REQUIRE(parsedStatements.size() == 1); + auto firstStatement = parsedStatements[0]; + REQUIRE(firstStatement -> GetKind() == ParsedStatementKind::Expression); + auto expression = ((ParsedExpressionStatement*)firstStatement)->GetExpression(); + REQUIRE(expression -> GetKind() == ParsedExpressionKind::Binary); + auto binary = ((BinaryExpression*)expression); + CHECK(binary -> GetOperatorKind() == BinaryOperatorKind::Addition); + auto left = binary->GetLeft(); + auto right = binary->GetRight(); + REQUIRE(left->GetKind() == ParsedExpressionKind::LiteralInteger); + REQUIRE(right->GetKind() == ParsedExpressionKind::Binary); + CHECK(((LiteralIntegerExpression*)left)->GetValue() == 5); + left = ((BinaryExpression*)right)->GetLeft(); + right = ((BinaryExpression*)right)->GetRight(); + CHECK(((LiteralIntegerExpression*)left)->GetValue() == 10); + CHECK(((LiteralIntegerExpression*)right)->GetValue() == 6); +} + + +#endif \ No newline at end of file diff --git a/src/Parser/Parser.hpp b/src/Parser/Parser.hpp new file mode 100644 index 0000000..5d8290d --- /dev/null +++ b/src/Parser/Parser.hpp @@ -0,0 +1,38 @@ +#include + +#ifndef PORYGONLANG_PARSER_HPP +#define PORYGONLANG_PARSER_HPP + + +#include "ParsedStatements/ParsedStatement.hpp" + +enum class OperatorPrecedence { + No, + LogicalOr, + LogicalAnd, + Equality, + Additive, + Multiplication, + Unary, +}; + +class Parser { + vector _tokens; + unsigned int _position; + + IToken* Peek(); + IToken* Next(); + ParsedStatement* ParseStatement(IToken* current); + ParsedExpression* ParseExpression(IToken* current); + ParsedExpression* ParseBinaryExpression(IToken* current, OperatorPrecedence parentPrecedence); + ParsedExpression* ParsePrimaryExpression(IToken* current); +public: + ParsedScriptStatement* Parse(); + explicit Parser(vector tokens){ + _tokens = std::move(tokens); + _position = 0; + } +}; + + +#endif //PORYGONLANG_PARSER_HPP diff --git a/src/Parser/Token.hpp b/src/Parser/Token.hpp index 96a0c28..bc7da2d 100644 --- a/src/Parser/Token.hpp +++ b/src/Parser/Token.hpp @@ -24,6 +24,10 @@ public: unsigned int GetEndPosition(){ return Position + Length - 1; } + + unsigned int GetLength(){ + return Length; + } }; class SimpleToken : public IToken{ diff --git a/src/Parser/TokenKind.hpp b/src/Parser/TokenKind.hpp index 8f5d301..fb7f21a 100644 --- a/src/Parser/TokenKind.hpp +++ b/src/Parser/TokenKind.hpp @@ -1,7 +1,7 @@ #ifndef PORYGONLANG_TOKENKIND_HPP #define PORYGONLANG_TOKENKIND_HPP -enum TokenKind{ +enum class TokenKind{ EndOfFile, WhiteSpace, diff --git a/src/Parser/UnaryOperatorKind.hpp b/src/Parser/UnaryOperatorKind.hpp new file mode 100644 index 0000000..4ff8732 --- /dev/null +++ b/src/Parser/UnaryOperatorKind.hpp @@ -0,0 +1,10 @@ + +#ifndef PORYGONLANG_UNARYOPERATORKIND_HPP +#define PORYGONLANG_UNARYOPERATORKIND_HPP + +enum class UnaryOperatorKind{ + Identity, + Negation, + LogicalNegation, +}; +#endif //PORYGONLANG_UNARYOPERATORKIND_HPP