MalachScript/src/Parser/Parser.cpp

426 lines
19 KiB
C++
Raw Normal View History

2020-10-05 15:45:00 +00:00
#include "Parser.hpp"
2020-10-07 20:11:18 +00:00
#include <iostream>
#include "../CoreData/FuncAttr.hpp"
#include "../CoreData/PrimitiveTypes.hpp"
2020-10-07 20:11:18 +00:00
#define PROGRESS_TOKEN(token) \
token = (token)->GetNext().get(); \
while ((token)->GetKind() == LexTokenKind::Whitespace) { \
(token) = (token)->GetNext().get(); \
}
#define EXPECT_TOKEN(token, kind) \
if (token->GetKind() != LexTokenKind::kind) { \
LogError(Diagnostics::DiagnosticType::UnexpectedToken, token->GetSpan()); \
2020-10-07 20:11:18 +00:00
}
2020-10-05 15:45:00 +00:00
namespace MalachScript::Parser {
2020-10-09 09:54:43 +00:00
const ParsedScriptStatement* Parser::Parse() { return ParseScript(); }
const ParsedScriptStatement* Parser::ParseScript() {
2020-10-07 20:11:18 +00:00
std::vector<const ParsedStatement*> statements;
statements.reserve(32);
2020-10-05 15:45:00 +00:00
size_t current = 0;
while (true) {
2020-10-07 20:11:18 +00:00
while (_currentToken->GetKind() == LexTokenKind::Whitespace) {
_currentToken = _currentToken->GetNext().get();
}
if (_currentToken->GetKind() == LexTokenKind::EndOfFile) {
2020-10-05 15:45:00 +00:00
break;
}
2020-10-07 20:11:18 +00:00
const ParsedStatement* statement;
auto result = ParseClass(statement) || ParseFunc(statement) || ParseNamespace(statement);
2020-10-09 09:54:43 +00:00
if (!result) {
// TODO: Log error
PROGRESS_TOKEN(_currentToken);
2020-10-09 09:54:43 +00:00
continue;
2020-10-07 20:11:18 +00:00
}
statements.push_back(statement);
2020-10-05 15:45:00 +00:00
current++;
}
statements.resize(current);
auto end = 0;
if (current > 0) {
end = statements.back()->GetSpan().GetEnd();
}
2020-10-07 20:11:18 +00:00
return new ParsedScriptStatement(TextSpan(0, end), statements);
2020-10-05 15:45:00 +00:00
}
2020-10-07 20:11:18 +00:00
bool Parser::ParseClass(const ParsedStatement*& out) {
const auto* current = _currentToken;
auto start = current->GetSpan().GetStart();
bool lookingForClass = true;
bool encounteredError = false;
while (lookingForClass) {
switch (current->GetKind()) {
case LexTokenKind::SharedKeyword: break;
case LexTokenKind::AbstractKeyword: break;
case LexTokenKind::FinalKeyword: break;
case LexTokenKind::ExternalKeyword: break;
case LexTokenKind::ClassKeyword: lookingForClass = false; break;
default: return false;
}
PROGRESS_TOKEN(current);
}
// After class keyword, an identifier should always follow, if it doesn't, log an error.
Identifier identifier;
if (!ParseIdentifier(identifier, current)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan());
}
2020-10-07 20:11:18 +00:00
PROGRESS_TOKEN(current);
std::vector<Identifier> inherits;
2020-10-07 20:11:18 +00:00
std::vector<const ParsedStatement*> body;
body.reserve(16);
switch (current->GetKind()) {
case LexTokenKind::SemicolonSymbol: {
PROGRESS_TOKEN(current);
break;
}
case LexTokenKind::ColonSymbol: {
PROGRESS_TOKEN(current);
Identifier id;
if (!ParseIdentifier(id, _currentToken)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan());
}
2020-10-07 20:11:18 +00:00
inherits.push_back(id);
while (current->GetKind() == LexTokenKind::CommaSymbol) {
PROGRESS_TOKEN(current);
if (!ParseIdentifier(id, _currentToken)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan());
}
2020-10-07 20:11:18 +00:00
inherits.push_back(id);
PROGRESS_TOKEN(current);
}
if (!encounteredError && current->GetKind() != LexTokenKind::OpenCurlyParenthesisSymbol) {
2020-10-07 20:11:18 +00:00
encounteredError = true;
LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan());
}
// Intentionally don't break so we continue into the inner body statement.
}
case LexTokenKind::OpenCurlyParenthesisSymbol: {
PROGRESS_TOKEN(current);
while (true) {
// Cheapest operation, check first
if (current->GetKind() == LexTokenKind::CloseCurlyParenthesisSymbol) {
PROGRESS_TOKEN(current);
break;
}
const ParsedStatement* statement;
// TODO: Sort by
if (!ParseVirtProp(statement) && !ParseFunc(statement) && !ParseVar(statement) &&
!ParseFuncDef(statement)) {
2020-10-07 20:11:18 +00:00
LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan());
} else {
2020-10-07 20:11:18 +00:00
body.push_back(statement);
}
}
break;
}
default: throw;
}
out = new ParsedClassStatement(TextSpan(start, current->GetSpan().GetEnd()), identifier, inherits, body);
2020-10-07 20:11:18 +00:00
_currentToken = current;
return true;
2020-10-05 15:45:00 +00:00
}
bool Parser::ParseTypeDef(const ParsedStatement*& out) {
if (_currentToken->GetKind() != LexTokenKind::TypedefKeyword) {
return false;
}
auto start = _currentToken->GetSpan().GetStart();
PROGRESS_TOKEN(_currentToken);
Identifier defineFrom;
if (!ParsePrimType(defineFrom, _currentToken) && !ParseIdentifier(defineFrom, _currentToken)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan());
}
PROGRESS_TOKEN(_currentToken);
Identifier defineTo;
if (!ParseIdentifier(defineTo, _currentToken)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan());
}
2020-10-09 09:54:43 +00:00
PROGRESS_TOKEN(_currentToken);
EXPECT_TOKEN(_currentToken, SemicolonSymbol);
PROGRESS_TOKEN(_currentToken);
out = new ParsedTypeDefStatement(TextSpan(start, _currentToken->GetSpan().GetEnd()), defineTo, defineFrom);
return true;
}
2020-10-09 09:54:43 +00:00
bool Parser::ParseNamespace(const ParsedStatement*& out) {
if (_currentToken->GetKind() != LexTokenKind::NamespaceKeyword) {
return false;
}
auto start = _currentToken->GetSpan().GetStart();
PROGRESS_TOKEN(_currentToken);
Identifier identifier;
if (!ParseIdentifier(identifier, _currentToken)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan());
}
2020-10-09 09:54:43 +00:00
auto script = ParseScript();
auto end = _currentToken->GetSpan().GetEnd();
PROGRESS_TOKEN(_currentToken);
out = new ParsedNamespaceStatement(TextSpan(start, end), identifier, script);
return true;
}
bool Parser::ParseFunc(const ParsedStatement*& out) {
auto start = _currentToken->GetSpan().GetStart();
const auto* token = _currentToken;
bool isShared = false;
bool isExternal = false;
bool modifiers = true;
while (modifiers) {
switch (token->GetKind()) {
case LexTokenKind::SharedKeyword:
isShared = true;
PROGRESS_TOKEN(token);
continue;
case LexTokenKind::ExternalKeyword:
isExternal = true;
PROGRESS_TOKEN(token);
continue;
default: modifiers = false; break;
}
}
AccessModifier accessModifier = AccessModifier::Public;
if (token->GetKind() == LexTokenKind::PrivateKeyword) {
accessModifier = AccessModifier::Private;
PROGRESS_TOKEN(token);
} else if (token->GetKind() == LexTokenKind::ProtectedKeyword) {
accessModifier = AccessModifier::Protected;
PROGRESS_TOKEN(token);
}
const ParsedStatement* typeStatement = nullptr;
bool returnsReference = false;
if (token->GetKind() == LexTokenKind::TildeSymbol) {
// TODO: Handle destructor
throw std::logic_error("not implemented");
} else if (ParseType(typeStatement, token)) {
if (token->GetKind() == LexTokenKind::AmpersandSymbol) {
returnsReference = true;
PROGRESS_TOKEN(token);
}
}
Identifier identifier;
if (!ParseIdentifier(identifier, token)) {
return false;
}
PROGRESS_TOKEN(token);
const ParsedStatement* paramList = nullptr;
if (!ParseParamList(paramList, token)) {
return false;
}
_currentToken = token;
bool isConst = false;
if (_currentToken->GetKind() == LexTokenKind::ConstKeyword) {
isConst = true;
PROGRESS_TOKEN(_currentToken);
}
bool lookingForFuncAttr = true;
FuncAttr funcAttr;
while (lookingForFuncAttr) {
switch (_currentToken->GetKind()) {
case LexTokenKind::OverrideKeyword:
PROGRESS_TOKEN(_currentToken);
funcAttr = FuncAttrHelpers::Set(funcAttr, FuncAttr::Override);
continue;
case LexTokenKind::FinalKeyword:
PROGRESS_TOKEN(_currentToken);
funcAttr = FuncAttrHelpers::Set(funcAttr, FuncAttr::Final);
continue;
case LexTokenKind::ExplicitKeyword:
PROGRESS_TOKEN(_currentToken);
funcAttr = FuncAttrHelpers::Set(funcAttr, FuncAttr::Explicit);
continue;
case LexTokenKind::PropertyKeyword:
PROGRESS_TOKEN(_currentToken);
funcAttr = FuncAttrHelpers::Set(funcAttr, FuncAttr::Property);
continue;
default: lookingForFuncAttr = false; break;
}
}
const ParsedStatement* statblock = nullptr;
if (_currentToken->GetKind() != LexTokenKind::SemicolonSymbol) {
// TODO: Parse stat block.
throw std::logic_error("not implemented");
}
out = new ParsedFuncStatement(TextSpan(start, _currentToken->GetSpan().GetEnd()), isShared, isExternal,
accessModifier, typeStatement, returnsReference, identifier, paramList, isConst,
funcAttr, statblock);
return true;
}
bool Parser::ParseType(const ParsedStatement*& out, const LexToken*& currentToken) {
const auto* token = currentToken;
auto start = token->GetSpan().GetStart();
bool isConst = false;
bool isArray = false;
bool isHandle = false;
if (token->GetKind() == LexTokenKind::ConstKeyword) {
isConst = true;
PROGRESS_TOKEN(token);
}
ScopedIdentifier scopedIdentifier;
ParseScope(scopedIdentifier.GetScope(), token);
if (!ParseDataType(scopedIdentifier.GetIdentifier(), token)) {
return false;
}
// TODO: Generics.
if (token->GetKind() == LexTokenKind::OpenBlockParenthesisSymbol) {
PROGRESS_TOKEN(token);
if (token->GetKind() != LexTokenKind::CloseBlockParenthesisSymbol) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, token->GetSpan());
} else {
PROGRESS_TOKEN(token);
isArray = true;
}
} else if (token->GetKind() == LexTokenKind::AtSymbol) {
isHandle = true;
PROGRESS_TOKEN(token);
if (token->GetKind() == LexTokenKind::ConstKeyword) {
isConst = true;
PROGRESS_TOKEN(token);
}
}
auto end = token->GetSpan().GetEnd();
currentToken = token;
out = new ParsedTypeStatement(TextSpan(start, end), isConst, isArray, isHandle, scopedIdentifier);
return true;
}
bool Parser::ParseScope(std::vector<Identifier>& scope, const LexToken*& currentToken) {
if (currentToken->GetKind() == LexTokenKind::ColonColonSymbol) {
scope.emplace_back();
PROGRESS_TOKEN(currentToken);
}
Identifier identifier;
if (ParseIdentifier(identifier, currentToken)) {
const auto* n = currentToken->GetNext().get();
currentToken = n;
scope.push_back(identifier);
} else {
return false;
}
while (currentToken != nullptr && currentToken->GetKind() == LexTokenKind::ColonColonSymbol) {
const auto* n = currentToken;
PROGRESS_TOKEN(n);
if (ParseIdentifier(identifier, n)) {
PROGRESS_TOKEN(n);
if (n->GetKind() == LexTokenKind::ColonColonSymbol) {
currentToken = n;
scope.push_back(identifier);
} else {
break;
}
} else {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, currentToken->GetSpan());
break;
}
}
// TODO: Handle generics in script class name.
return true;
}
bool Parser::ParseParamList(const ParsedStatement*& out, const LexToken*& currentToken) {
if (currentToken->GetKind() != LexTokenKind::OpenParenthesisSymbol) {
return false;
}
auto start = currentToken->GetSpan().GetStart();
PROGRESS_TOKEN(currentToken);
std::vector<ParsedParamListStatement::ParsedParameter> parameters;
if (currentToken->GetKind() == LexTokenKind::VoidKeyword) {
PROGRESS_TOKEN(currentToken);
if (currentToken->GetKind() != LexTokenKind::CloseParenthesisSymbol) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, currentToken->GetSpan());
}
PROGRESS_TOKEN(currentToken);
out = new ParsedParamListStatement(TextSpan(start, currentToken->GetSpan().GetEnd()), parameters);
return true;
} else if (currentToken->GetKind() == LexTokenKind::CloseParenthesisSymbol) {
out = new ParsedParamListStatement(TextSpan(start, currentToken->GetSpan().GetEnd()), parameters);
PROGRESS_TOKEN(currentToken);
return true;
}
while (true) {
parameters.emplace_back();
auto parameter = parameters.at(parameters.size() - 1);
if (!ParseType((const ParsedStatement*&)parameter.GetTypeStatement(), currentToken)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, currentToken->GetSpan());
}
ParseTypeMod(parameter.GetTypeMod());
ParseIdentifier(parameter.GetIdentifier(), currentToken);
if (currentToken->GetKind() != LexTokenKind::CommaSymbol) {
break;
}
PROGRESS_TOKEN(currentToken);
}
if (currentToken->GetKind() != LexTokenKind::CloseParenthesisSymbol) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, currentToken->GetSpan());
}
out = new ParsedParamListStatement(TextSpan(start, currentToken->GetSpan().GetEnd()), parameters);
PROGRESS_TOKEN(currentToken);
return true;
}
bool Parser::ParseDataType(Identifier& out, const LexToken*& currentToken) {
switch (currentToken->GetKind()) {
case LexTokenKind::Identifier:
out = static_cast<const IdentifierToken*>(currentToken)->GetValue();
PROGRESS_TOKEN(currentToken);
return true;
case LexTokenKind::AutoKeyword:
out = PrimitiveTypes::AutoName();
PROGRESS_TOKEN(currentToken);
return true;
default:
if (ParsePrimType(out, currentToken)) {
PROGRESS_TOKEN(currentToken);
return true;
}
return false;
}
}
bool Parser::ParseVirtProp([[maybe_unused]] const ParsedStatement*& out) { return false; }
bool Parser::ParseVar([[maybe_unused]] const ParsedStatement*& out) { return false; }
bool Parser::ParseFuncDef([[maybe_unused]] const ParsedStatement*& out) { return false; }
bool Parser::ParsePrimType(Identifier& out, const LexToken*& token) {
switch (token->GetKind()) {
case LexTokenKind::VoidKeyword: out = PrimitiveTypes::VoidName(); return true;
case LexTokenKind::IntKeyword: out = PrimitiveTypes::IntName(); return true;
case LexTokenKind::Int8Keyword: out = PrimitiveTypes::Int8Name(); return true;
case LexTokenKind::Int16Keyword: out = PrimitiveTypes::Int16Name(); return true;
case LexTokenKind::Int32Keyword: out = PrimitiveTypes::Int32Name(); return true;
case LexTokenKind::Int64Keyword: out = PrimitiveTypes::Int64Name(); return true;
case LexTokenKind::UintKeyword: PrimitiveTypes::UintName(); return true;
case LexTokenKind::Uint8Keyword: PrimitiveTypes::Uint8Name(); return true;
case LexTokenKind::Uint16Keyword: PrimitiveTypes::Uint16Name(); return true;
case LexTokenKind::Uint32Keyword: PrimitiveTypes::Uint32Name(); return true;
case LexTokenKind::Uint64Keyword: PrimitiveTypes::Uint64Name(); return true;
case LexTokenKind::FloatKeyword: PrimitiveTypes::FloatName(); return true;
case LexTokenKind::DoubleKeyword: PrimitiveTypes::DoubleName(); return true;
case LexTokenKind::BoolKeyword: PrimitiveTypes::BoolName(); return true;
default: return false;
}
}
bool Parser::ParseTypeMod(TypeMod& typeMod) {
if (_currentToken->GetKind() != LexTokenKind::AmpersandSymbol) {
return false;
}
PROGRESS_TOKEN(_currentToken);
switch (_currentToken->GetKind()) {
case LexTokenKind::InKeyword:
typeMod = TypeMod::RefIn;
PROGRESS_TOKEN(_currentToken);
return true;
case LexTokenKind::OutKeyword:
typeMod = TypeMod::RefOut;
PROGRESS_TOKEN(_currentToken);
return true;
case LexTokenKind::InoutKeyword:
typeMod = TypeMod::RefInOut;
PROGRESS_TOKEN(_currentToken);
return true;
default: typeMod = TypeMod::RefInOut; return true;
}
}
2020-10-05 15:45:00 +00:00
}