Initial work on parsing.

This commit is contained in:
2020-10-07 22:11:18 +02:00
parent f299d5183f
commit 2036f1ce43
10 changed files with 276 additions and 55 deletions

View File

@@ -2,13 +2,11 @@
#define MALACHSCRIPT_LEXTOKEN_HPP
#include <memory>
#include <utility>
#include "LexTokenKind.hpp"
#include "../../TextSpan.hpp"
namespace MalachScript::Parser {
class LexToken {
friend class Lexer;
std::unique_ptr<const LexToken> _next;
TextSpan _span;
@@ -18,12 +16,16 @@ namespace MalachScript::Parser {
[[nodiscard]] virtual LexTokenKind GetKind() const noexcept = 0;
[[nodiscard]] const std::unique_ptr<const LexToken>& GetNext() const noexcept { return _next; }
[[nodiscard]] const TextSpan& GetSpan() const noexcept { return _span; }
void SetNext(LexToken* token){
_next = std::unique_ptr<const LexToken>(token);
}
};
template <LexTokenKind kind> class LexTokenImpl : public LexToken {
public:
LexTokenImpl(TextSpan span) : LexToken(span){};
[[nodiscard]] LexTokenKind GetKind() const noexcept override { return kind; }
[[nodiscard]] LexTokenKind GetKind() const noexcept final { return kind; }
};
class IntegerLiteral : public LexTokenImpl<LexTokenKind::IntegerLiteral> {
@@ -53,12 +55,12 @@ namespace MalachScript::Parser {
};
class IdentifierToken : public LexTokenImpl<LexTokenKind::Identifier> {
std::u8string _value;
std::u8string_view _value;
public:
IdentifierToken(TextSpan span, std::u8string value)
: LexTokenImpl<LexTokenKind::Identifier>(span), _value(std::move(value)) {}
[[nodiscard]] const std::u8string& GetValue() const noexcept { return _value; }
IdentifierToken(TextSpan span, std::u8string_view value)
: LexTokenImpl<LexTokenKind::Identifier>(span), _value(value) {}
[[nodiscard]] const std::u8string_view& GetValue() const noexcept { return _value; }
};
}

View File

@@ -12,7 +12,7 @@ namespace MalachScript::Parser {
auto* last = first;
while (true) {
auto* next = LexNext();
last->_next = std::unique_ptr<const LexToken>(next);
last->SetNext(next);
last = next;
if (next->GetKind() == LexTokenKind::EndOfFile) {
break;
@@ -459,8 +459,21 @@ namespace MalachScript::Parser {
}
static uint32_t constexpr Hash(const char8_t* input) {
return *input != 0U ? static_cast<uint32_t>(*input) + 33 * Hash(input + 1) : 5381;
if (*input != 0U) {
return static_cast<uint32_t>(*input) + 33 * Hash(input + 1);
} else {
return 5381;
}
};
static uint32_t HashStringView(const std::u8string_view& sv){
auto init = 5381;
for (auto it = sv.rbegin(); it != sv.rend(); ++it)
{
init *= 33;
init += static_cast<uint32_t>(*it);
}
return init;
}
LexToken* Lexer::LexKeywordOrIdentifier() {
auto start = _position;
@@ -468,9 +481,9 @@ namespace MalachScript::Parser {
while (IsAlphaNumericalOrUnderscore(Peek(offset))) {
offset++;
}
auto str = std::u8string(_script.substr(start, offset));
auto str = _script.substr(start, offset);
Progress(offset - 1);
switch (Hash(str.c_str())) {
switch (HashStringView(str)) {
case Hash(u8"and"): return Create<LexTokenImpl<LexTokenKind::AndKeyword>>(TextSpan(start, _position));
case Hash(u8"abstract"):
return Create<LexTokenImpl<LexTokenKind::AbstractKeyword>>(TextSpan(start, _position));