MalachScript/src/Parser/Parser.cpp

554 lines
24 KiB
C++

#include "Parser.hpp"
#include <iostream>
#include "../CoreData/PrimitiveTypes.hpp"
#define PROGRESS_TOKEN(token) \
token = (token)->GetNext().get(); \
while ((token)->GetKind() == LexTokenKind::Whitespace) { \
(token) = (token)->GetNext().get(); \
}
#define EXPECT_TOKEN(token, kind) \
if (token->GetKind() != LexTokenKind::kind) { \
LogError(Diagnostics::DiagnosticType::UnexpectedToken, token->GetSpan()); \
}
namespace MalachScript::Parser {
const ParsedScriptStatement* Parser::Parse() { return ParseScript(); }
const ParsedScriptStatement* Parser::ParseScript() {
std::vector<const ParsedStatement*> statements;
statements.reserve(32);
size_t current = 0;
while (true) {
while (_currentToken->GetKind() == LexTokenKind::Whitespace) {
_currentToken = _currentToken->GetNext().get();
}
if (_currentToken->GetKind() == LexTokenKind::EndOfFile) {
break;
}
const ParsedStatement* statement;
auto result = ParseClass(statement) || ParseFunc(statement) || ParseNamespace(statement);
if (!result) {
// TODO: Log error
PROGRESS_TOKEN(_currentToken);
continue;
}
statements.push_back(statement);
current++;
}
statements.resize(current);
auto end = 0;
if (current > 0) {
end = statements.back()->GetSpan().GetEnd();
}
return new ParsedScriptStatement(TextSpan(0, end), statements);
}
bool Parser::ParseClass(const ParsedStatement*& out) {
const auto* current = _currentToken;
auto start = current->GetSpan().GetStart();
bool lookingForClass = true;
while (lookingForClass) {
switch (current->GetKind()) {
case LexTokenKind::SharedKeyword: break;
case LexTokenKind::AbstractKeyword: break;
case LexTokenKind::FinalKeyword: break;
case LexTokenKind::ExternalKeyword: break;
case LexTokenKind::ClassKeyword: lookingForClass = false; break;
default: return false;
}
PROGRESS_TOKEN(current);
}
// After class keyword, an identifier should always follow, if it doesn't, log an error.
Identifier identifier;
if (!ParseIdentifier(identifier, current)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan());
}
PROGRESS_TOKEN(current);
std::vector<Identifier> inherits;
std::vector<const ParsedStatement*> body;
body.reserve(16);
switch (current->GetKind()) {
case LexTokenKind::SemicolonSymbol: {
PROGRESS_TOKEN(current);
break;
}
case LexTokenKind::ColonSymbol: {
PROGRESS_TOKEN(current);
Identifier id;
if (!ParseIdentifier(id, _currentToken)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan());
}
inherits.push_back(id);
while (current->GetKind() == LexTokenKind::CommaSymbol) {
PROGRESS_TOKEN(current);
if (!ParseIdentifier(id, _currentToken)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan());
}
inherits.push_back(id);
PROGRESS_TOKEN(current);
}
if (current->GetKind() != LexTokenKind::OpenCurlyParenthesisSymbol) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan());
}
[[fallthrough]];
// Intentionally don't break so we continue into the inner body statement.
}
case LexTokenKind::OpenCurlyParenthesisSymbol: {
PROGRESS_TOKEN(current);
while (true) {
// Cheapest operation, check first
if (current->GetKind() == LexTokenKind::CloseCurlyParenthesisSymbol) {
PROGRESS_TOKEN(current);
break;
}
const ParsedStatement* statement = nullptr;
// TODO: Sort by complexity
if (!ParseVirtProp(statement, current) && !ParseFunc(statement) && !ParseVar(statement) &&
!ParseFuncDef(statement)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan());
break;
} else {
body.push_back(statement);
}
}
break;
}
default: throw;
}
out = new ParsedClassStatement(TextSpan(start, current->GetSpan().GetEnd()), identifier, inherits, body);
_currentToken = current;
return true;
}
bool Parser::ParseTypeDef(const ParsedStatement*& out) {
if (_currentToken->GetKind() != LexTokenKind::TypedefKeyword) {
return false;
}
auto start = _currentToken->GetSpan().GetStart();
PROGRESS_TOKEN(_currentToken);
Identifier defineFrom;
if (!ParsePrimType(defineFrom, _currentToken) && !ParseIdentifier(defineFrom, _currentToken)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan());
}
PROGRESS_TOKEN(_currentToken);
Identifier defineTo;
if (!ParseIdentifier(defineTo, _currentToken)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan());
}
PROGRESS_TOKEN(_currentToken);
EXPECT_TOKEN(_currentToken, SemicolonSymbol);
PROGRESS_TOKEN(_currentToken);
out = new ParsedTypeDefStatement(TextSpan(start, _currentToken->GetSpan().GetEnd()), defineTo, defineFrom);
return true;
}
bool Parser::ParseNamespace(const ParsedStatement*& out) {
if (_currentToken->GetKind() != LexTokenKind::NamespaceKeyword) {
return false;
}
auto start = _currentToken->GetSpan().GetStart();
PROGRESS_TOKEN(_currentToken);
Identifier identifier;
if (!ParseIdentifier(identifier, _currentToken)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, _currentToken->GetSpan());
}
const auto* script = ParseScript();
auto end = _currentToken->GetSpan().GetEnd();
PROGRESS_TOKEN(_currentToken);
out = new ParsedNamespaceStatement(TextSpan(start, end), identifier, script);
return true;
}
bool Parser::ParseFunc(const ParsedStatement*& out) {
auto start = _currentToken->GetSpan().GetStart();
const auto* token = _currentToken;
bool isShared = false;
bool isExternal = false;
bool modifiers = true;
while (modifiers) {
switch (token->GetKind()) {
case LexTokenKind::SharedKeyword:
isShared = true;
PROGRESS_TOKEN(token);
continue;
case LexTokenKind::ExternalKeyword:
isExternal = true;
PROGRESS_TOKEN(token);
continue;
default: modifiers = false; break;
}
}
AccessModifier accessModifier = AccessModifier::Public;
if (token->GetKind() == LexTokenKind::PrivateKeyword) {
accessModifier = AccessModifier::Private;
PROGRESS_TOKEN(token);
} else if (token->GetKind() == LexTokenKind::ProtectedKeyword) {
accessModifier = AccessModifier::Protected;
PROGRESS_TOKEN(token);
}
const ParsedStatement* typeStatement = nullptr;
bool returnsReference = false;
if (token->GetKind() == LexTokenKind::TildeSymbol) {
// TODO: Handle destructor
throw std::logic_error("not implemented");
} else if (ParseType(typeStatement, token)) {
if (token->GetKind() == LexTokenKind::AmpersandSymbol) {
returnsReference = true;
PROGRESS_TOKEN(token);
}
}
Identifier identifier;
if (!ParseIdentifier(identifier, token)) {
return false;
}
PROGRESS_TOKEN(token);
const ParsedStatement* paramList = nullptr;
if (!ParseParamList(paramList, token)) {
return false;
}
_currentToken = token;
bool isConst = false;
if (_currentToken->GetKind() == LexTokenKind::ConstKeyword) {
isConst = true;
PROGRESS_TOKEN(_currentToken);
}
FuncAttr funcAttr = FuncAttr::None;
ParseFuncAttr(funcAttr, _currentToken);
const ParsedStatement* statblock = nullptr;
if (_currentToken->GetKind() != LexTokenKind::SemicolonSymbol) {
// TODO: Parse stat block.
throw std::logic_error("not implemented");
}
out = new ParsedFuncStatement(TextSpan(start, _currentToken->GetSpan().GetEnd()), isShared, isExternal,
accessModifier, typeStatement, returnsReference, identifier, paramList, isConst,
funcAttr, statblock);
return true;
}
bool Parser::ParseType(const ParsedStatement*& out, const LexToken*& currentToken) {
const auto* token = currentToken;
auto start = token->GetSpan().GetStart();
bool isConst = false;
bool isArray = false;
bool isHandle = false;
if (token->GetKind() == LexTokenKind::ConstKeyword) {
isConst = true;
PROGRESS_TOKEN(token);
}
ScopedIdentifier scopedIdentifier;
ParseScope(scopedIdentifier.GetScope(), token);
if (!ParseDataType(scopedIdentifier.GetIdentifier(), token)) {
return false;
}
// TODO: Generics.
if (token->GetKind() == LexTokenKind::OpenBlockParenthesisSymbol) {
PROGRESS_TOKEN(token);
if (token->GetKind() != LexTokenKind::CloseBlockParenthesisSymbol) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, token->GetSpan());
} else {
PROGRESS_TOKEN(token);
isArray = true;
}
} else if (token->GetKind() == LexTokenKind::AtSymbol) {
isHandle = true;
PROGRESS_TOKEN(token);
if (token->GetKind() == LexTokenKind::ConstKeyword) {
isConst = true;
PROGRESS_TOKEN(token);
}
}
auto end = token->GetSpan().GetEnd();
currentToken = token;
out = new ParsedTypeStatement(TextSpan(start, end), isConst, isArray, isHandle, scopedIdentifier);
return true;
}
bool Parser::ParseScope(std::vector<Identifier>& scope, const LexToken*& currentToken) {
const auto* current = currentToken;
if (current->GetKind() == LexTokenKind::ColonColonSymbol) {
scope.emplace_back();
PROGRESS_TOKEN(current);
}
Identifier identifier;
if (ParseIdentifier(identifier, current)) {
PROGRESS_TOKEN(current);
scope.push_back(identifier);
if (current->GetKind() != LexTokenKind::ColonColonSymbol){
return false;
}
} else {
return false;
}
while (current != nullptr && current->GetKind() == LexTokenKind::ColonColonSymbol) {
PROGRESS_TOKEN(current);
const auto* n = current;
if (ParseIdentifier(identifier, n)) {
PROGRESS_TOKEN(n);
if (n->GetKind() == LexTokenKind::ColonColonSymbol) {
current = n;
scope.push_back(identifier);
} else {
break;
}
} else {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, currentToken->GetSpan());
break;
}
}
// TODO: Handle generics in script class name.
currentToken = current;
return true;
}
bool Parser::ParseFuncAttr(FuncAttr& out, const LexToken*& currentToken) {
bool lookingForFuncAttr = true;
const auto* current = currentToken;
while (lookingForFuncAttr) {
switch (current->GetKind()) {
case LexTokenKind::OverrideKeyword:
PROGRESS_TOKEN(current);
out = FuncAttrHelpers::Set(out, FuncAttr::Override);
continue;
case LexTokenKind::FinalKeyword:
PROGRESS_TOKEN(current);
out = FuncAttrHelpers::Set(out, FuncAttr::Final);
continue;
case LexTokenKind::ExplicitKeyword:
PROGRESS_TOKEN(current);
out = FuncAttrHelpers::Set(out, FuncAttr::Explicit);
continue;
case LexTokenKind::PropertyKeyword:
PROGRESS_TOKEN(current);
out = FuncAttrHelpers::Set(out, FuncAttr::Property);
continue;
default: lookingForFuncAttr = false; break;
}
}
currentToken = current;
return true;
}
bool Parser::ParseParamList(const ParsedStatement*& out, const LexToken*& currentToken) {
if (currentToken->GetKind() != LexTokenKind::OpenParenthesisSymbol) {
return false;
}
auto start = currentToken->GetSpan().GetStart();
PROGRESS_TOKEN(currentToken);
std::vector<const ParsedParamListStatement::ParsedParameter*> parameters;
if (currentToken->GetKind() == LexTokenKind::VoidKeyword) {
PROGRESS_TOKEN(currentToken);
if (currentToken->GetKind() != LexTokenKind::CloseParenthesisSymbol) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, currentToken->GetSpan());
}
PROGRESS_TOKEN(currentToken);
out = new ParsedParamListStatement(TextSpan(start, currentToken->GetSpan().GetEnd()), parameters);
return true;
}
if (currentToken->GetKind() == LexTokenKind::CloseParenthesisSymbol) {
out = new ParsedParamListStatement(TextSpan(start, currentToken->GetSpan().GetEnd()), parameters);
PROGRESS_TOKEN(currentToken);
return true;
}
while (true) {
const ParsedStatement* typeStatement = nullptr;
TypeMod typeMod = TypeMod::None;
Identifier identifier;
const ParsedExpression* defaultExpression = nullptr;
if (!ParseType(typeStatement, currentToken)) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, currentToken->GetSpan());
}
ParseTypeMod(typeMod, currentToken);
ParseIdentifier(identifier, currentToken);
PROGRESS_TOKEN(currentToken);
// TODO: Default expression
parameters.push_back(new ParsedParamListStatement::ParsedParameter(
dynamic_cast<const ParsedTypeStatement*>(typeStatement), typeMod, identifier, defaultExpression));
if (currentToken->GetKind() != LexTokenKind::CommaSymbol) {
break;
}
PROGRESS_TOKEN(currentToken);
}
while (currentToken->GetKind() != LexTokenKind::CloseParenthesisSymbol) {
LogError(Diagnostics::DiagnosticType::UnexpectedToken, currentToken->GetSpan());
if (currentToken->GetKind() == LexTokenKind::EndOfFile) {
break;
}
if (currentToken->GetKind() == LexTokenKind::SemicolonSymbol) {
break;
}
PROGRESS_TOKEN(currentToken);
}
out = new ParsedParamListStatement(TextSpan(start, currentToken->GetSpan().GetEnd()), parameters);
if (currentToken->GetKind() != LexTokenKind::SemicolonSymbol) {
PROGRESS_TOKEN(currentToken);
}
return true;
}
bool Parser::ParseDataType(Identifier& out, const LexToken*& currentToken) {
switch (currentToken->GetKind()) {
case LexTokenKind::Identifier:
out = static_cast<const IdentifierToken*>(currentToken)->GetValue();
PROGRESS_TOKEN(currentToken);
return true;
case LexTokenKind::AutoKeyword:
out = PrimitiveTypes::AutoName();
PROGRESS_TOKEN(currentToken);
return true;
default:
if (ParsePrimType(out, currentToken)) {
PROGRESS_TOKEN(currentToken);
return true;
}
return false;
}
}
bool Parser::ParseVirtProp([[maybe_unused]] const ParsedStatement*& out, const LexToken*& currentToken) {
AccessModifier access = AccessModifier::Public;
const auto* current = currentToken;
if (current->GetKind() == LexTokenKind::PrivateKeyword) {
access = AccessModifier::Private;
PROGRESS_TOKEN(current);
} else if (current->GetKind() == LexTokenKind::ProtectedKeyword) {
access = AccessModifier::Protected;
PROGRESS_TOKEN(current);
}
const ParsedStatement* typeStatement = nullptr;
if (!ParseType(typeStatement, current)) {
return false;
}
bool ref = false;
if (current->GetKind() == LexTokenKind::AmpersandSymbol) {
ref = true;
PROGRESS_TOKEN(current);
}
Identifier identifier;
if (!ParseIdentifier(identifier, current)) {
delete typeStatement;
return false;
}
PROGRESS_TOKEN(current);
if (current->GetKind() != LexTokenKind::OpenCurlyParenthesisSymbol) {
return false;
}
bool hasGet = false;
bool getConst = false;
FuncAttr getAttr = FuncAttr::None;
const ParsedStatement* getStatement = nullptr;
bool hasSet = false;
bool setConst = false;
FuncAttr setAttr = FuncAttr::None;
const ParsedStatement* setStatement = nullptr;
PROGRESS_TOKEN(current);
while (true) {
auto start = current->GetSpan().GetStart();
if (current->GetKind() == LexTokenKind::GetKeyword) {
PROGRESS_TOKEN(current);
if (current->GetKind() == LexTokenKind::ConstKeyword) {
getConst = true;
PROGRESS_TOKEN(current);
}
ParseFuncAttr(getAttr, current);
if (current->GetKind() != LexTokenKind::SemicolonSymbol) {
// TODO: Parse stat block.
// if (ParseStatBlock(getStatement, current)){
//
// }
this->LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan());
}
PROGRESS_TOKEN(current);
if (hasGet) {
this->LogError(Diagnostics::DiagnosticType::DoubleProperty,
TextSpan(start, current->GetSpan().GetEnd()));
}
hasGet = true;
} else if (current->GetKind() == LexTokenKind::SetKeyword) {
PROGRESS_TOKEN(current);
if (current->GetKind() == LexTokenKind::ConstKeyword) {
setConst = true;
PROGRESS_TOKEN(current);
}
ParseFuncAttr(setAttr, current);
if (current->GetKind() != LexTokenKind::SemicolonSymbol) {
// TODO: Parse stat block.
// if (ParseStatBlock(setStatement, current)){
//
// }
this->LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan());
}
PROGRESS_TOKEN(current);
if (hasSet) {
this->LogError(Diagnostics::DiagnosticType::DoubleProperty,
TextSpan(start, current->GetSpan().GetEnd()));
}
hasSet = true;
} else {
break;
}
}
if (current->GetKind() != LexTokenKind::CloseCurlyParenthesisSymbol){
LogError(Diagnostics::DiagnosticType::UnexpectedToken, current->GetSpan());
}
else{
PROGRESS_TOKEN(current);
}
out = new ParsedVirtPropStatement(TextSpan(currentToken->GetSpan().GetStart(), current->GetSpan().GetEnd()),
access, typeStatement, ref, identifier, hasGet, getConst, getAttr,
getStatement, hasSet, setConst, setAttr, setStatement);
currentToken = current;
return true;
}
bool Parser::ParseVar([[maybe_unused]] const ParsedStatement*& out) { return false; }
bool Parser::ParseFuncDef([[maybe_unused]] const ParsedStatement*& out) { return false; }
bool Parser::ParsePrimType(Identifier& out, const LexToken*& token) {
switch (token->GetKind()) {
case LexTokenKind::VoidKeyword: out = PrimitiveTypes::VoidName(); return true;
case LexTokenKind::IntKeyword: out = PrimitiveTypes::IntName(); return true;
case LexTokenKind::Int8Keyword: out = PrimitiveTypes::Int8Name(); return true;
case LexTokenKind::Int16Keyword: out = PrimitiveTypes::Int16Name(); return true;
case LexTokenKind::Int32Keyword: out = PrimitiveTypes::Int32Name(); return true;
case LexTokenKind::Int64Keyword: out = PrimitiveTypes::Int64Name(); return true;
case LexTokenKind::UintKeyword: out = PrimitiveTypes::UintName(); return true;
case LexTokenKind::Uint8Keyword: out = PrimitiveTypes::Uint8Name(); return true;
case LexTokenKind::Uint16Keyword: out = PrimitiveTypes::Uint16Name(); return true;
case LexTokenKind::Uint32Keyword: out = PrimitiveTypes::Uint32Name(); return true;
case LexTokenKind::Uint64Keyword: out = PrimitiveTypes::Uint64Name(); return true;
case LexTokenKind::FloatKeyword: out = PrimitiveTypes::FloatName(); return true;
case LexTokenKind::DoubleKeyword: out = PrimitiveTypes::DoubleName(); return true;
case LexTokenKind::BoolKeyword: out = PrimitiveTypes::BoolName(); return true;
default: return false;
}
}
bool Parser::ParseTypeMod(TypeMod& typeMod, const LexToken*& currentToken) {
if (currentToken->GetKind() != LexTokenKind::AmpersandSymbol) {
return false;
}
PROGRESS_TOKEN(currentToken);
switch (currentToken->GetKind()) {
case LexTokenKind::InKeyword:
typeMod = TypeMod::RefIn;
PROGRESS_TOKEN(currentToken);
return true;
case LexTokenKind::OutKeyword:
typeMod = TypeMod::RefOut;
PROGRESS_TOKEN(currentToken);
return true;
case LexTokenKind::InoutKeyword:
typeMod = TypeMod::RefInOut;
PROGRESS_TOKEN(currentToken);
return true;
default: typeMod = TypeMod::RefInOut; return true;
}
}
}