Rework identifier handling, adds typedef statement.

This commit is contained in:
2020-10-08 19:53:02 +02:00
parent e99b1af78d
commit 2fb1b68ead
9 changed files with 295 additions and 119 deletions

View File

@@ -2,8 +2,9 @@
#define MALACHSCRIPT_LEXTOKEN_HPP
#include <memory>
#include "LexTokenKind.hpp"
#include "../../CoreData/Identifier.hpp"
#include "../../TextSpan.hpp"
#include "LexTokenKind.hpp"
namespace MalachScript::Parser {
class LexToken {
@@ -55,12 +56,12 @@ namespace MalachScript::Parser {
};
class IdentifierToken : public LexTokenImpl<LexTokenKind::Identifier> {
std::u8string_view _value;
Identifier _value;
public:
IdentifierToken(TextSpan span, std::u8string_view value)
IdentifierToken(TextSpan span, Identifier value)
: LexTokenImpl<LexTokenKind::Identifier>(span), _value(value) {}
[[nodiscard]] const std::u8string_view& GetValue() const noexcept { return _value; }
[[nodiscard]] const Identifier& GetValue() const noexcept { return _value; }
};
}

View File

@@ -458,23 +458,6 @@ namespace MalachScript::Parser {
return Create<StringLiteral>(TextSpan(start, start + _position), std::u8string(_script.substr(start, offset)));
}
static uint32_t constexpr Hash(const char8_t* input) {
if (*input != 0U) {
return static_cast<uint32_t>(*input) + 33 * Hash(input + 1);
} else {
return 5381;
}
};
static uint32_t HashStringView(const std::u8string_view& sv){
auto init = 5381;
for (auto it = sv.rbegin(); it != sv.rend(); ++it)
{
init *= 33;
init += static_cast<uint32_t>(*it);
}
return init;
}
LexToken* Lexer::LexKeywordOrIdentifier() {
auto start = _position;
auto offset = 0;
@@ -483,87 +466,138 @@ namespace MalachScript::Parser {
}
auto str = _script.substr(start, offset);
Progress(offset - 1);
switch (HashStringView(str)) {
case Hash(u8"and"): return Create<LexTokenImpl<LexTokenKind::AndKeyword>>(TextSpan(start, _position));
case Hash(u8"abstract"):
auto hash = Identifier::HashStringView(str);
switch (hash) {
case Identifier::Hash(u8"and"):
return Create<LexTokenImpl<LexTokenKind::AndKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"abstract"):
return Create<LexTokenImpl<LexTokenKind::AbstractKeyword>>(TextSpan(start, _position));
case Hash(u8"auto"): return Create<LexTokenImpl<LexTokenKind::AutoKeyword>>(TextSpan(start, _position));
case Hash(u8"bool"): return Create<LexTokenImpl<LexTokenKind::BoolKeyword>>(TextSpan(start, _position));
case Hash(u8"break"): return Create<LexTokenImpl<LexTokenKind::BreakKeyword>>(TextSpan(start, _position));
case Hash(u8"case"): return Create<LexTokenImpl<LexTokenKind::CaseKeyword>>(TextSpan(start, _position));
case Hash(u8"cast"): return Create<LexTokenImpl<LexTokenKind::CastKeyword>>(TextSpan(start, _position));
case Hash(u8"catch"): return Create<LexTokenImpl<LexTokenKind::CatchKeyword>>(TextSpan(start, _position));
case Hash(u8"class"): return Create<LexTokenImpl<LexTokenKind::ClassKeyword>>(TextSpan(start, _position));
case Hash(u8"const"): return Create<LexTokenImpl<LexTokenKind::ConstKeyword>>(TextSpan(start, _position));
case Hash(u8"continue"):
case Identifier::Hash(u8"auto"):
return Create<LexTokenImpl<LexTokenKind::AutoKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"bool"):
return Create<LexTokenImpl<LexTokenKind::BoolKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"break"):
return Create<LexTokenImpl<LexTokenKind::BreakKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"case"):
return Create<LexTokenImpl<LexTokenKind::CaseKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"cast"):
return Create<LexTokenImpl<LexTokenKind::CastKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"catch"):
return Create<LexTokenImpl<LexTokenKind::CatchKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"class"):
return Create<LexTokenImpl<LexTokenKind::ClassKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"const"):
return Create<LexTokenImpl<LexTokenKind::ConstKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"continue"):
return Create<LexTokenImpl<LexTokenKind::ContinueKeyword>>(TextSpan(start, _position));
case Hash(u8"default"):
case Identifier::Hash(u8"default"):
return Create<LexTokenImpl<LexTokenKind::DefaultKeyword>>(TextSpan(start, _position));
case Hash(u8"do"): return Create<LexTokenImpl<LexTokenKind::DoKeyword>>(TextSpan(start, _position));
case Hash(u8"double"): return Create<LexTokenImpl<LexTokenKind::DoubleKeyword>>(TextSpan(start, _position));
case Hash(u8"else"): return Create<LexTokenImpl<LexTokenKind::ElseKeyword>>(TextSpan(start, _position));
case Hash(u8"enum"): return Create<LexTokenImpl<LexTokenKind::EnumKeyword>>(TextSpan(start, _position));
case Hash(u8"explicit"):
case Identifier::Hash(u8"do"):
return Create<LexTokenImpl<LexTokenKind::DoKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"double"):
return Create<LexTokenImpl<LexTokenKind::DoubleKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"else"):
return Create<LexTokenImpl<LexTokenKind::ElseKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"enum"):
return Create<LexTokenImpl<LexTokenKind::EnumKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"explicit"):
return Create<LexTokenImpl<LexTokenKind::ExplicitKeyword>>(TextSpan(start, _position));
case Hash(u8"external"):
case Identifier::Hash(u8"external"):
return Create<LexTokenImpl<LexTokenKind::ExternalKeyword>>(TextSpan(start, _position));
case Hash(u8"false"): return Create<LexTokenImpl<LexTokenKind::FalseKeyword>>(TextSpan(start, _position));
case Hash(u8"final"): return Create<LexTokenImpl<LexTokenKind::FinalKeyword>>(TextSpan(start, _position));
case Hash(u8"float"): return Create<LexTokenImpl<LexTokenKind::FloatKeyword>>(TextSpan(start, _position));
case Hash(u8"for"): return Create<LexTokenImpl<LexTokenKind::ForKeyword>>(TextSpan(start, _position));
case Hash(u8"from"): return Create<LexTokenImpl<LexTokenKind::FromKeyword>>(TextSpan(start, _position));
case Hash(u8"funcdef"):
case Identifier::Hash(u8"false"):
return Create<LexTokenImpl<LexTokenKind::FalseKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"final"):
return Create<LexTokenImpl<LexTokenKind::FinalKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"float"):
return Create<LexTokenImpl<LexTokenKind::FloatKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"for"):
return Create<LexTokenImpl<LexTokenKind::ForKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"from"):
return Create<LexTokenImpl<LexTokenKind::FromKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"funcdef"):
return Create<LexTokenImpl<LexTokenKind::FuncdefKeyword>>(TextSpan(start, _position));
case Hash(u8"function"):
case Identifier::Hash(u8"function"):
return Create<LexTokenImpl<LexTokenKind::FunctionKeyword>>(TextSpan(start, _position));
case Hash(u8"get"): return Create<LexTokenImpl<LexTokenKind::GetKeyword>>(TextSpan(start, _position));
case Hash(u8"if"): return Create<LexTokenImpl<LexTokenKind::IfKeyword>>(TextSpan(start, _position));
case Hash(u8"import"): return Create<LexTokenImpl<LexTokenKind::ImportKeyword>>(TextSpan(start, _position));
case Hash(u8"in"): return Create<LexTokenImpl<LexTokenKind::InKeyword>>(TextSpan(start, _position));
case Hash(u8"inout"): return Create<LexTokenImpl<LexTokenKind::InoutKeyword>>(TextSpan(start, _position));
case Hash(u8"int"): return Create<LexTokenImpl<LexTokenKind::IntKeyword>>(TextSpan(start, _position));
case Hash(u8"interface"):
case Identifier::Hash(u8"get"):
return Create<LexTokenImpl<LexTokenKind::GetKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"if"):
return Create<LexTokenImpl<LexTokenKind::IfKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"import"):
return Create<LexTokenImpl<LexTokenKind::ImportKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"in"):
return Create<LexTokenImpl<LexTokenKind::InKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"inout"):
return Create<LexTokenImpl<LexTokenKind::InoutKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"int"):
return Create<LexTokenImpl<LexTokenKind::IntKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"interface"):
return Create<LexTokenImpl<LexTokenKind::InterfaceKeyword>>(TextSpan(start, _position));
case Hash(u8"int8"): return Create<LexTokenImpl<LexTokenKind::Int8Keyword>>(TextSpan(start, _position));
case Hash(u8"int16"): return Create<LexTokenImpl<LexTokenKind::Int16Keyword>>(TextSpan(start, _position));
case Hash(u8"int32"): return Create<LexTokenImpl<LexTokenKind::Int32Keyword>>(TextSpan(start, _position));
case Hash(u8"int64"): return Create<LexTokenImpl<LexTokenKind::Int64Keyword>>(TextSpan(start, _position));
case Hash(u8"is"): return Create<LexTokenImpl<LexTokenKind::IsKeyword>>(TextSpan(start, _position));
case Hash(u8"mixin"): return Create<LexTokenImpl<LexTokenKind::MixinKeyword>>(TextSpan(start, _position));
case Hash(u8"namespace"):
case Identifier::Hash(u8"int8"):
return Create<LexTokenImpl<LexTokenKind::Int8Keyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"int16"):
return Create<LexTokenImpl<LexTokenKind::Int16Keyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"int32"):
return Create<LexTokenImpl<LexTokenKind::Int32Keyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"int64"):
return Create<LexTokenImpl<LexTokenKind::Int64Keyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"is"):
return Create<LexTokenImpl<LexTokenKind::IsKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"mixin"):
return Create<LexTokenImpl<LexTokenKind::MixinKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"namespace"):
return Create<LexTokenImpl<LexTokenKind::NamespaceKeyword>>(TextSpan(start, _position));
case Hash(u8"not"): return Create<LexTokenImpl<LexTokenKind::NotKeyword>>(TextSpan(start, _position));
case Hash(u8"null"): return Create<LexTokenImpl<LexTokenKind::NullKeyword>>(TextSpan(start, _position));
case Hash(u8"or"): return Create<LexTokenImpl<LexTokenKind::OrKeyword>>(TextSpan(start, _position));
case Hash(u8"out"): return Create<LexTokenImpl<LexTokenKind::OutKeyword>>(TextSpan(start, _position));
case Hash(u8"override"):
case Identifier::Hash(u8"not"):
return Create<LexTokenImpl<LexTokenKind::NotKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"null"):
return Create<LexTokenImpl<LexTokenKind::NullKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"or"):
return Create<LexTokenImpl<LexTokenKind::OrKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"out"):
return Create<LexTokenImpl<LexTokenKind::OutKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"override"):
return Create<LexTokenImpl<LexTokenKind::OverrideKeyword>>(TextSpan(start, _position));
case Hash(u8"private"):
case Identifier::Hash(u8"private"):
return Create<LexTokenImpl<LexTokenKind::PrivateKeyword>>(TextSpan(start, _position));
case Hash(u8"property"):
case Identifier::Hash(u8"property"):
return Create<LexTokenImpl<LexTokenKind::PropertyKeyword>>(TextSpan(start, _position));
case Hash(u8"protected"):
case Identifier::Hash(u8"protected"):
return Create<LexTokenImpl<LexTokenKind::ProtectedKeyword>>(TextSpan(start, _position));
case Hash(u8"return"): return Create<LexTokenImpl<LexTokenKind::ReturnKeyword>>(TextSpan(start, _position));
case Hash(u8"set"): return Create<LexTokenImpl<LexTokenKind::SetKeyword>>(TextSpan(start, _position));
case Hash(u8"shared"): return Create<LexTokenImpl<LexTokenKind::SharedKeyword>>(TextSpan(start, _position));
case Hash(u8"super"): return Create<LexTokenImpl<LexTokenKind::SuperKeyword>>(TextSpan(start, _position));
case Hash(u8"switch"): return Create<LexTokenImpl<LexTokenKind::SwitchKeyword>>(TextSpan(start, _position));
case Hash(u8"this"): return Create<LexTokenImpl<LexTokenKind::ThisKeyword>>(TextSpan(start, _position));
case Hash(u8"true"): return Create<LexTokenImpl<LexTokenKind::TrueKeyword>>(TextSpan(start, _position));
case Hash(u8"try"): return Create<LexTokenImpl<LexTokenKind::TryKeyword>>(TextSpan(start, _position));
case Hash(u8"typedef"):
case Identifier::Hash(u8"return"):
return Create<LexTokenImpl<LexTokenKind::ReturnKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"set"):
return Create<LexTokenImpl<LexTokenKind::SetKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"shared"):
return Create<LexTokenImpl<LexTokenKind::SharedKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"super"):
return Create<LexTokenImpl<LexTokenKind::SuperKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"switch"):
return Create<LexTokenImpl<LexTokenKind::SwitchKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"this"):
return Create<LexTokenImpl<LexTokenKind::ThisKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"true"):
return Create<LexTokenImpl<LexTokenKind::TrueKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"try"):
return Create<LexTokenImpl<LexTokenKind::TryKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"typedef"):
return Create<LexTokenImpl<LexTokenKind::TypedefKeyword>>(TextSpan(start, _position));
case Hash(u8"uint"): return Create<LexTokenImpl<LexTokenKind::UintKeyword>>(TextSpan(start, _position));
case Hash(u8"uint8"): return Create<LexTokenImpl<LexTokenKind::Uint8Keyword>>(TextSpan(start, _position));
case Hash(u8"uint16"): return Create<LexTokenImpl<LexTokenKind::Uint16Keyword>>(TextSpan(start, _position));
case Hash(u8"uint32"): return Create<LexTokenImpl<LexTokenKind::Uint32Keyword>>(TextSpan(start, _position));
case Hash(u8"uint64"): return Create<LexTokenImpl<LexTokenKind::Uint64Keyword>>(TextSpan(start, _position));
case Hash(u8"void"): return Create<LexTokenImpl<LexTokenKind::VoidKeyword>>(TextSpan(start, _position));
case Hash(u8"while"): return Create<LexTokenImpl<LexTokenKind::WhileKeyword>>(TextSpan(start, _position));
case Hash(u8"xor"): return Create<LexTokenImpl<LexTokenKind::XorKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"uint"):
return Create<LexTokenImpl<LexTokenKind::UintKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"uint8"):
return Create<LexTokenImpl<LexTokenKind::Uint8Keyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"uint16"):
return Create<LexTokenImpl<LexTokenKind::Uint16Keyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"uint32"):
return Create<LexTokenImpl<LexTokenKind::Uint32Keyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"uint64"):
return Create<LexTokenImpl<LexTokenKind::Uint64Keyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"void"):
return Create<LexTokenImpl<LexTokenKind::VoidKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"while"):
return Create<LexTokenImpl<LexTokenKind::WhileKeyword>>(TextSpan(start, _position));
case Identifier::Hash(u8"xor"):
return Create<LexTokenImpl<LexTokenKind::XorKeyword>>(TextSpan(start, _position));
default: return Create<IdentifierToken>(TextSpan(start, _position), str);
default: return Create<IdentifierToken>(TextSpan(start, _position), Identifier(str, hash));
}
}
bool Lexer::IsAlphaNumericalOrUnderscore(char8_t c) {