diff --git a/src/Diagnostics/DiagnosticType.hpp b/src/Diagnostics/DiagnosticType.hpp index 7ad1927..24a63f4 100644 --- a/src/Diagnostics/DiagnosticType.hpp +++ b/src/Diagnostics/DiagnosticType.hpp @@ -3,7 +3,12 @@ #include namespace MalachScript::Diagnostics { - enum class DiagnosticType : uint8_t { UnknownToken, InvalidNumericalBase, ExpectedEndOfString }; + enum class DiagnosticType : uint8_t { + UnknownToken, + InvalidNumericalBase, + ExpectedEndOfString, + UnexpectedToken, + }; class DiagnosticTypeHelper { static std::string ToEnglishString(DiagnosticType type) { @@ -11,6 +16,7 @@ namespace MalachScript::Diagnostics { case DiagnosticType::UnknownToken: return "Unknown token"; case DiagnosticType::InvalidNumericalBase: return "Invalid numerical base"; case DiagnosticType::ExpectedEndOfString: return "Expected end of string"; + case DiagnosticType::UnexpectedToken: return "Unexpected Token"; } return std::to_string((uint8_t)type); } diff --git a/src/Parser/Expressions/ParsedExperssion.hpp b/src/Parser/Expressions/ParsedExperssion.hpp deleted file mode 100644 index 22c3d3d..0000000 --- a/src/Parser/Expressions/ParsedExperssion.hpp +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef MALACHSCRIPT_PARSEDEXPERSSION_HPP -#define MALACHSCRIPT_PARSEDEXPERSSION_HPP - -namespace MalachScript::Parser{ - class ParsedExpression{}; -} - -#endif // MALACHSCRIPT_PARSEDEXPERSSION_HPP diff --git a/src/Parser/Lexer/LexToken.hpp b/src/Parser/Lexer/LexToken.hpp index dfea5d4..6676854 100644 --- a/src/Parser/Lexer/LexToken.hpp +++ b/src/Parser/Lexer/LexToken.hpp @@ -2,13 +2,11 @@ #define MALACHSCRIPT_LEXTOKEN_HPP #include -#include #include "LexTokenKind.hpp" +#include "../../TextSpan.hpp" namespace MalachScript::Parser { class LexToken { - friend class Lexer; - std::unique_ptr _next; TextSpan _span; @@ -18,12 +16,16 @@ namespace MalachScript::Parser { [[nodiscard]] virtual LexTokenKind GetKind() const noexcept = 0; [[nodiscard]] const std::unique_ptr& GetNext() const noexcept { return _next; } [[nodiscard]] const TextSpan& GetSpan() const noexcept { return _span; } + + void SetNext(LexToken* token){ + _next = std::unique_ptr(token); + } }; template class LexTokenImpl : public LexToken { public: LexTokenImpl(TextSpan span) : LexToken(span){}; - [[nodiscard]] LexTokenKind GetKind() const noexcept override { return kind; } + [[nodiscard]] LexTokenKind GetKind() const noexcept final { return kind; } }; class IntegerLiteral : public LexTokenImpl { @@ -53,12 +55,12 @@ namespace MalachScript::Parser { }; class IdentifierToken : public LexTokenImpl { - std::u8string _value; + std::u8string_view _value; public: - IdentifierToken(TextSpan span, std::u8string value) - : LexTokenImpl(span), _value(std::move(value)) {} - [[nodiscard]] const std::u8string& GetValue() const noexcept { return _value; } + IdentifierToken(TextSpan span, std::u8string_view value) + : LexTokenImpl(span), _value(value) {} + [[nodiscard]] const std::u8string_view& GetValue() const noexcept { return _value; } }; } diff --git a/src/Parser/Lexer/Lexer.cpp b/src/Parser/Lexer/Lexer.cpp index dc17214..b73d230 100644 --- a/src/Parser/Lexer/Lexer.cpp +++ b/src/Parser/Lexer/Lexer.cpp @@ -12,7 +12,7 @@ namespace MalachScript::Parser { auto* last = first; while (true) { auto* next = LexNext(); - last->_next = std::unique_ptr(next); + last->SetNext(next); last = next; if (next->GetKind() == LexTokenKind::EndOfFile) { break; @@ -459,8 +459,21 @@ namespace MalachScript::Parser { } static uint32_t constexpr Hash(const char8_t* input) { - return *input != 0U ? static_cast(*input) + 33 * Hash(input + 1) : 5381; + if (*input != 0U) { + return static_cast(*input) + 33 * Hash(input + 1); + } else { + return 5381; + } }; + static uint32_t HashStringView(const std::u8string_view& sv){ + auto init = 5381; + for (auto it = sv.rbegin(); it != sv.rend(); ++it) + { + init *= 33; + init += static_cast(*it); + } + return init; + } LexToken* Lexer::LexKeywordOrIdentifier() { auto start = _position; @@ -468,9 +481,9 @@ namespace MalachScript::Parser { while (IsAlphaNumericalOrUnderscore(Peek(offset))) { offset++; } - auto str = std::u8string(_script.substr(start, offset)); + auto str = _script.substr(start, offset); Progress(offset - 1); - switch (Hash(str.c_str())) { + switch (HashStringView(str)) { case Hash(u8"and"): return Create>(TextSpan(start, _position)); case Hash(u8"abstract"): return Create>(TextSpan(start, _position)); diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index c8c87ed..9c566bd 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -1,15 +1,28 @@ #include "Parser.hpp" +#include + +#define PROGRESS_TOKEN(token) \ + token = token->GetNext().get(); \ + while (token->GetKind() == LexTokenKind::Whitespace) { \ + token = token->GetNext().get(); \ + } namespace MalachScript::Parser { ParsedScriptStatement* Parser::Parse() { - std::vector statements(32); + std::vector statements; + statements.reserve(32); size_t current = 0; while (true) { - auto next = this->Consume(); - if (next->GetKind() == LexTokenKind::EndOfFile) { + while (_currentToken->GetKind() == LexTokenKind::Whitespace) { + _currentToken = _currentToken->GetNext().get(); + } + if (_currentToken->GetKind() == LexTokenKind::EndOfFile) { break; } - statements[current] = this->ParseStatement(next); + const ParsedStatement* statement; + if (ParseClass(statement)) { + } + statements.push_back(statement); current++; } statements.resize(current); @@ -17,10 +30,84 @@ namespace MalachScript::Parser { if (current > 0) { end = statements.back()->GetSpan().GetEnd(); } - const auto* block = new ParsedBlockStatement(TextSpan(0, end), statements); - return new ParsedScriptStatement(block); + return new ParsedScriptStatement(TextSpan(0, end), statements); } - const ParsedStatement* Parser::ParseStatement(const LexToken* token) { - // If modifier (shared, external, private, protected, etc) push to buffer, continue + bool Parser::ParseClass(const ParsedStatement*& out) { + const auto* current = _currentToken; + auto start = current->GetSpan().GetStart(); + bool lookingForClass = true; + bool encounteredError = false; + while (lookingForClass) { + switch (current->GetKind()) { + case LexTokenKind::SharedKeyword: break; + case LexTokenKind::AbstractKeyword: break; + case LexTokenKind::FinalKeyword: break; + case LexTokenKind::ExternalKeyword: break; + case LexTokenKind::ClassKeyword: lookingForClass = false; break; + default: return false; + } + PROGRESS_TOKEN(current); + } + std::u8string_view identifier; + // After class keyword, an identifier should always follow, if it doesn't, log an error. + identifier = ParseIdentifier(current, encounteredError); + PROGRESS_TOKEN(current); + std::vector inherits; + std::vector body; + body.reserve(16); + + switch (current->GetKind()) { + case LexTokenKind::SemicolonSymbol: { + PROGRESS_TOKEN(current); + break; + } + case LexTokenKind::ColonSymbol: { + PROGRESS_TOKEN(current); + auto id = ParseIdentifier(current, encounteredError); + inherits.push_back(id); + while (current->GetKind() == LexTokenKind::CommaSymbol) { + PROGRESS_TOKEN(current); + id = ParseIdentifier(current, encounteredError); + inherits.push_back(id); + PROGRESS_TOKEN(current); + } + if (!encounteredError && current->GetKind() != LexTokenKind::OpenCurlyParenthesisSymbol) + { + encounteredError = true; + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + } + // Intentionally don't break so we continue into the inner body statement. + } + case LexTokenKind::OpenCurlyParenthesisSymbol: { + PROGRESS_TOKEN(current); + while (true) { + // Cheapest operation, check first + if (current->GetKind() == LexTokenKind::CloseCurlyParenthesisSymbol) { + PROGRESS_TOKEN(current); + break; + } + const ParsedStatement* statement; + // TODO: Sort by + if (!ParseVirtProp(statement) && + !ParseFunc(statement) && + !ParseVar(statement) && + !ParseFuncDef(statement)){ + LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan()); + } else{ + body.push_back(statement); + } + } + break; + } + default: throw; + } + out = new ParsedClassStatement(TextSpan(start, current->GetSpan().GetEnd()), identifier, + inherits, body); + _currentToken = current; + return true; } + bool Parser::ParseVirtProp([[maybe_unused]]const ParsedStatement*& out) { return false; } + bool Parser::ParseFunc([[maybe_unused]]const ParsedStatement*& out) { return false; } + bool Parser::ParseVar([[maybe_unused]]const ParsedStatement*& out) { return false; } + bool Parser::ParseFuncDef([[maybe_unused]]const ParsedStatement*& out) { return false; } } diff --git a/src/Parser/Parser.hpp b/src/Parser/Parser.hpp index d3e2e4a..455d3c5 100644 --- a/src/Parser/Parser.hpp +++ b/src/Parser/Parser.hpp @@ -1,32 +1,43 @@ #ifndef MALACHSCRIPT_PARSER_HPP #define MALACHSCRIPT_PARSER_HPP +#include "../Diagnostics/Diagnostics.hpp" #include "Lexer/LexToken.hpp" #include "Statements/ParsedStatement.hpp" namespace MalachScript::Parser { class Parser { public: - Parser(const LexToken* firstToken) : _currentToken(firstToken) {} + Parser(const char* scriptName, const LexToken* firstToken, Diagnostics::Diagnostics* diagnostics) + : _scriptName(reinterpret_cast(scriptName)), _diagnostics(diagnostics), + _currentToken(firstToken) {} + + Parser(std::u8string_view scriptName, const LexToken* firstToken, Diagnostics::Diagnostics* diagnostics) + : _scriptName(scriptName), _diagnostics(diagnostics), _currentToken(firstToken) {} ParsedScriptStatement* Parse(); private: + std::u8string_view _scriptName; + Diagnostics::Diagnostics* _diagnostics; const LexToken* _currentToken; - inline const LexToken* Peek() { - if (_currentToken->GetKind() == LexTokenKind::EndOfFile) { - return _currentToken; - } - return _currentToken->GetNext().get(); + + inline void LogError(Diagnostics::DiagnosticType type, const TextSpan& span) { + _diagnostics->LogError(type, _scriptName, span); } - inline const LexToken* Consume() { - if (_currentToken->GetKind() == LexTokenKind::EndOfFile) { - return _currentToken; - } - _currentToken = _currentToken->GetNext().get(); - return _currentToken; - } + bool ParseClass(const ParsedStatement*& out); + bool ParseVirtProp(const ParsedStatement*& out); + bool ParseFunc(const ParsedStatement*& out); + bool ParseVar(const ParsedStatement*& out); + bool ParseFuncDef(const ParsedStatement*& out); - const ParsedStatement* ParseStatement(const LexToken* token); + std::u8string_view ParseIdentifier(const LexToken* token, bool& logError) { + if (logError && token->GetKind() != LexTokenKind::Identifier) { + LogError(Diagnostics::DiagnosticType::UnexpectedToken, token->GetSpan()); + logError = false; + return std::u8string_view(); + } + return reinterpret_cast(token)->GetValue(); + } }; } diff --git a/src/Parser/Statements/ParsedStatement.hpp b/src/Parser/Statements/ParsedStatement.hpp index e62bc7e..b795811 100644 --- a/src/Parser/Statements/ParsedStatement.hpp +++ b/src/Parser/Statements/ParsedStatement.hpp @@ -10,6 +10,7 @@ namespace MalachScript::Parser { public: ParsedStatement(TextSpan span) : _span(span) {} + virtual ~ParsedStatement() = default; [[nodiscard]] virtual ParsedStatementKind GetKind() const noexcept = 0; [[nodiscard]] inline const TextSpan& GetSpan() const noexcept { return _span; } }; @@ -17,15 +18,15 @@ namespace MalachScript::Parser { template class ParsedStatementImpl : public ParsedStatement { public: ParsedStatementImpl(TextSpan span) : ParsedStatement(span) {} - [[nodiscard]] inline ParsedStatementKind GetKind() const noexcept override { return kind; } + [[nodiscard]] inline ParsedStatementKind GetKind() const noexcept final { return kind; } }; - class ParsedBlockStatement : public ParsedStatementImpl { + class ParsedScriptStatement : public ParsedStatementImpl { std::vector> _statements; public: - ParsedBlockStatement(TextSpan span, const std::vector& statements) - : ParsedStatementImpl(span), _statements(statements.size()) { + ParsedScriptStatement(TextSpan span, const std::vector& statements) + : ParsedStatementImpl(span), _statements(statements.size()) { for (size_t i = 0; i < statements.size(); i++) _statements[i] = std::unique_ptr(statements[i]); } @@ -35,15 +36,18 @@ namespace MalachScript::Parser { } }; - class ParsedScriptStatement : public ParsedStatementImpl { - std::unique_ptr _block; + class ParsedClassStatement : public ParsedStatementImpl { + std::u8string_view _identifier; + std::vector _inherits; + std::vector> _body; public: - ParsedScriptStatement(const ParsedBlockStatement* block) - : ParsedStatementImpl(block->GetSpan()), _block(block) {} - - [[nodiscard]] inline const std::unique_ptr& GetBlock() const noexcept { - return _block; + ParsedClassStatement(TextSpan span, std::u8string_view identifier, std::vector inherits, + const std::vector& body) + : ParsedStatementImpl(span), _identifier(identifier), _inherits(inherits), + _body(body.size()) { + for (size_t i = 0; i < body.size(); i++) + _body[i] = std::unique_ptr(body[i]); } }; } diff --git a/src/Parser/Statements/ParsedStatementKind.hpp b/src/Parser/Statements/ParsedStatementKind.hpp index 7a8bb22..369abe6 100644 --- a/src/Parser/Statements/ParsedStatementKind.hpp +++ b/src/Parser/Statements/ParsedStatementKind.hpp @@ -2,10 +2,12 @@ #define MALACHSCRIPT_PARSEDSTATEMENTKIND_HPP namespace MalachScript::Parser { - enum class ParsedStatementKind{ + enum class ParsedStatementKind : uint8_t { Unknown, Block, Script, + ExpressionStatement, + Class, }; } diff --git a/tests/ParserTests/ClassTests.cpp b/tests/ParserTests/ClassTests.cpp new file mode 100644 index 0000000..7369253 --- /dev/null +++ b/tests/ParserTests/ClassTests.cpp @@ -0,0 +1,77 @@ +#include "../../extern/doctest.hpp" +#include "../../src/Parser/Parser.hpp" + +using namespace MalachScript; + +TEST_CASE("Parse basic class without body") { + std::vector vec = { + new Parser::LexTokenImpl(TextSpan(0, 0)), + new Parser::IdentifierToken(TextSpan(0, 0), u8"foobar"), + new Parser::LexTokenImpl(TextSpan(0, 0)), + new Parser::LexTokenImpl(TextSpan(0, 0)), + }; + for (size_t i = 0; i < vec.size() - 1; i++) { + vec[i]->SetNext(vec[i + 1]); + } + Diagnostics::Diagnostics diags; + auto parser = Parser::Parser(u8"class without body", vec.front(), &diags); + auto* script = parser.Parse(); + REQUIRE(diags.GetMessages().empty()); + { + REQUIRE(script->GetStatements().size() == 1); + REQUIRE(script->GetStatements()[0].get()->GetKind() == Parser::ParsedStatementKind::Class); + } + delete vec[0]; + delete script; +} + +#define PARSER_TEST(name, tokens, asserts) \ + TEST_CASE(name) { \ + std::vector vec = { \ + tokens, \ + new Parser::LexTokenImpl(TextSpan(0, 0)), \ + }; \ + for (size_t i = 0; i < vec.size() - 1; i++) { \ + vec[i]->SetNext(vec[i + 1]); \ + } \ + Diagnostics::Diagnostics diags; \ + auto parser = Parser::Parser(u8"scriptname", vec.front(), &diags); \ + auto* script = parser.Parse(); \ + REQUIRE(diags.GetMessages().empty()); \ + asserts; \ + delete vec[0]; \ + delete script; \ + } + +#define PARSER_TEST_TOKENS(...) __VA_ARGS__ + +PARSER_TEST("Parse basic class without body", + PARSER_TEST_TOKENS(new Parser::LexTokenImpl(TextSpan(0, 0)), + new Parser::IdentifierToken(TextSpan(0, 0), u8"foobar"), + new Parser::LexTokenImpl(TextSpan(0, 0))), + { + REQUIRE(script->GetStatements().size() == 1); + REQUIRE(script->GetStatements()[0].get()->GetKind() == Parser::ParsedStatementKind::Class); + }) + +PARSER_TEST("Parse basic class without body with whitespaces", + PARSER_TEST_TOKENS(new Parser::LexTokenImpl(TextSpan(0, 0)), + new Parser::LexTokenImpl(TextSpan(0, 0)), + new Parser::IdentifierToken(TextSpan(0, 0), u8"foobar"), + new Parser::LexTokenImpl(TextSpan(0, 0)), + new Parser::LexTokenImpl(TextSpan(0, 0))), + { + REQUIRE(script->GetStatements().size() == 1); + REQUIRE(script->GetStatements()[0].get()->GetKind() == Parser::ParsedStatementKind::Class); + }) + +PARSER_TEST( + "Parse basic class with empty body", + PARSER_TEST_TOKENS(new Parser::LexTokenImpl(TextSpan(0, 0)), + new Parser::IdentifierToken(TextSpan(0, 0), u8"foobar"), + new Parser::LexTokenImpl(TextSpan(0, 0)), + new Parser::LexTokenImpl(TextSpan(0, 0))), + { + REQUIRE(script->GetStatements().size() == 1); + REQUIRE(script->GetStatements()[0].get()->GetKind() == Parser::ParsedStatementKind::Class); + }) diff --git a/tests/ParserTests/ParserIntegrationTests.cpp b/tests/ParserTests/ParserIntegrationTests.cpp new file mode 100644 index 0000000..22b6f12 --- /dev/null +++ b/tests/ParserTests/ParserIntegrationTests.cpp @@ -0,0 +1,27 @@ +#include "../../extern/doctest.hpp" +#include "../../src/Parser/Lexer/Lexer.hpp" +#include "../../src/Parser/Parser.hpp" + +using namespace MalachScript; + +#define PARSE_TEST(name, scriptText, asserts) \ + TEST_CASE(name) { \ + Diagnostics::Diagnostics diags; \ + auto lexer = Parser::Lexer(name, scriptText, &diags); \ + auto token = lexer.Lex(); \ + auto parser = Parser::Parser(name, token, &diags); \ + auto script = parser.Parse(); \ + asserts; \ + } + +PARSE_TEST("Parse class without definition", "class foobar;", { + REQUIRE(diags.GetMessages().empty()); + REQUIRE(script->GetStatements().size() == 1); + REQUIRE(script->GetStatements()[0].get()->GetKind() == Parser::ParsedStatementKind::Class); +}) + +PARSE_TEST("Parse class with empty definition", "class foobar {}", { + REQUIRE(diags.GetMessages().empty()); + REQUIRE(script->GetStatements().size() == 1); + REQUIRE(script->GetStatements()[0].get()->GetKind() == Parser::ParsedStatementKind::Class); +}) \ No newline at end of file