Support for diagnostics system.

This commit is contained in:
Deukhoofd 2020-10-04 19:38:13 +02:00
parent 20976010d6
commit b6a5e047c2
Signed by: Deukhoofd
GPG Key ID: F63E044490819F6F
13 changed files with 347 additions and 217 deletions

View File

@ -0,0 +1,23 @@
#ifndef ELOHIMSCRIPT_DIAGNOSTIC_HPP
#define ELOHIMSCRIPT_DIAGNOSTIC_HPP
#include "../Parser/TextSpan.hpp"
#include "DiagnosticLevel.hpp"
#include "DiagnosticType.hpp"
namespace ElohimScript::Diagnostics {
class Diagnostic {
DiagnosticLevel _level;
DiagnosticType _type;
TextSpan _span;
public:
inline Diagnostic(DiagnosticLevel level, DiagnosticType type, TextSpan span)
: _level(level), _type(type), _span(span) {}
[[nodiscard]] inline DiagnosticLevel GetLevel() const noexcept { return _level; }
[[nodiscard]] inline DiagnosticType GetType() const noexcept { return _type; }
[[nodiscard]] inline const TextSpan& GetSpan() const noexcept { return _span; }
};
}
#endif // ELOHIMSCRIPT_DIAGNOSTIC_HPP

View File

@ -0,0 +1,15 @@
#ifndef ELOHIMSCRIPT_DIAGNOSTICLEVEL_HPP
#define ELOHIMSCRIPT_DIAGNOSTICLEVEL_HPP
#include <cstdint>
namespace ElohimScript::Diagnostics {
enum class DiagnosticLevel : uint8_t {
Trace,
Information,
Warning,
Error,
Critical,
};
}
#endif // ELOHIMSCRIPT_DIAGNOSTICLEVEL_HPP

View File

@ -0,0 +1,20 @@
#ifndef ELOHIMSCRIPT_DIAGNOSTICTYPE_HPP
#define ELOHIMSCRIPT_DIAGNOSTICTYPE_HPP
#include <string>
namespace ElohimScript::Diagnostics {
enum class DiagnosticType : uint8_t { UnknownToken, InvalidNumericalBase, ExpectedEndOfString };
class DiagnosticTypeHelper {
static std::string ToEnglishString(DiagnosticType type) {
switch (type) {
case DiagnosticType::UnknownToken: return "Unknown token";
case DiagnosticType::InvalidNumericalBase: return "Invalid numerical base";
case DiagnosticType::ExpectedEndOfString: return "Expected end of string";
}
return std::to_string((uint8_t)type);
}
};
}
#endif // ELOHIMSCRIPT_DIAGNOSTICTYPE_HPP

View File

@ -0,0 +1 @@
#include "Diagnostics.hpp"

View File

@ -0,0 +1,25 @@
#ifndef ELOHIMSCRIPT_DIAGNOSTICS_HPP
#define ELOHIMSCRIPT_DIAGNOSTICS_HPP
#include <vector>
#include "Diagnostic.hpp"
namespace ElohimScript::Diagnostics {
class Diagnostics {
std::vector<Diagnostic> _messages;
public:
inline void Log(DiagnosticLevel level, DiagnosticType type, TextSpan span) {
_messages.emplace_back(level, type, span);
}
inline void LogTrace(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Trace, type, span); }
inline void LogInfo(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Information, type, span); }
inline void LogWarning(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Warning, type, span); }
inline void LogError(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Error, type, span); }
inline void LogCritical(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Critical, type, span); }
[[nodiscard]] const std::vector<Diagnostic>& GetMessages() const noexcept { return _messages; }
};
}
#endif // ELOHIMSCRIPT_DIAGNOSTICS_HPP

View File

@ -10,16 +10,19 @@ namespace ElohimScript::Parser {
friend class Lexer;
std::unique_ptr<const LexToken> _next;
TextSpan _span;
public:
LexToken(TextSpan span) : _span(span) {}
virtual ~LexToken() = default;
[[nodiscard]] virtual LexTokenKind GetKind() const noexcept = 0;
[[nodiscard]] const std::unique_ptr<const LexToken>& GetNext() const noexcept { return _next; }
[[nodiscard]] const TextSpan& GetSpan() const noexcept { return _span; }
};
template <LexTokenKind kind> class LexTokenImpl : public LexToken {
public:
LexTokenImpl() = default;
LexTokenImpl(TextSpan span) : LexToken(span){};
[[nodiscard]] LexTokenKind GetKind() const noexcept override { return kind; }
};
@ -27,7 +30,8 @@ namespace ElohimScript::Parser {
uint64_t _value;
public:
IntegerLiteral(uint64_t value) : _value(value) {}
IntegerLiteral(TextSpan span, uint64_t value)
: LexTokenImpl<LexTokenKind::IntegerLiteral>(span), _value(value) {}
[[nodiscard]] uint64_t GetValue() const noexcept { return _value; }
};
@ -35,7 +39,7 @@ namespace ElohimScript::Parser {
double _value;
public:
FloatLiteral(double value) : _value(value) {}
FloatLiteral(TextSpan span, double value) : LexTokenImpl<LexTokenKind::FloatLiteral>(span), _value(value) {}
[[nodiscard]] double GetValue() const noexcept { return _value; }
};
@ -43,15 +47,17 @@ namespace ElohimScript::Parser {
std::u8string _value;
public:
StringLiteral(std::u8string value) : _value(std::move(value)) {}
StringLiteral(TextSpan span, std::u8string value)
: LexTokenImpl<LexTokenKind::StringLiteral>(span), _value(std::move(value)) {}
[[nodiscard]] const std::u8string& GetValue() const noexcept { return _value; }
};
class IdentifierToken : public LexTokenImpl<LexTokenKind::Identifier>{
std::u8string _value;
class IdentifierToken : public LexTokenImpl<LexTokenKind::Identifier> {
std::u8string _value;
public:
IdentifierToken(std::u8string value) : _value(std::move(value)) {}
IdentifierToken(TextSpan span, std::u8string value)
: LexTokenImpl<LexTokenKind::Identifier>(span), _value(std::move(value)) {}
[[nodiscard]] const std::u8string& GetValue() const noexcept { return _value; }
};
}

View File

@ -22,9 +22,10 @@ namespace ElohimScript::Parser {
}
LexToken* Lexer::LexNext() {
auto start = _position;
auto c = Consume();
switch (c) {
case u8'\0': return new LexTokenImpl<LexTokenKind::EndOfFile>();
case u8'\0': return new LexTokenImpl<LexTokenKind::EndOfFile>(TextSpan(start, 1));
case u8'*': {
auto n = Peek();
if (n == u8'*') {
@ -32,177 +33,219 @@ namespace ElohimScript::Parser {
n = Peek();
if (n == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::StarStarEqualsSymbol>();
// **=
return new LexTokenImpl<LexTokenKind::StarStarEqualsSymbol>(TextSpan(start, 3));
}
return new LexTokenImpl<LexTokenKind::StarStarSymbol>();
// **
return new LexTokenImpl<LexTokenKind::StarStarSymbol>(TextSpan(start, 2));
}
if (n == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::StarEqualsSymbol>();
// *=
return new LexTokenImpl<LexTokenKind::StarEqualsSymbol>(TextSpan(start, 2));
}
return new LexTokenImpl<LexTokenKind::StarSymbol>();
// *
return new LexTokenImpl<LexTokenKind::StarSymbol>(TextSpan(start, 1));
}
case u8'/':
if (Peek() == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::SlashEqualsSymbol>();
// /=
return new LexTokenImpl<LexTokenKind::SlashEqualsSymbol>(TextSpan(start, 2));
}
return new LexTokenImpl<LexTokenKind::SlashSymbol>();
// /
return new LexTokenImpl<LexTokenKind::SlashSymbol>(TextSpan(start, 1));
case u8'%':
if (Peek() == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::PercentEqualsSymbol>();
// %=
return new LexTokenImpl<LexTokenKind::PercentEqualsSymbol>(TextSpan(start, 2));
}
return new LexTokenImpl<LexTokenKind::PercentSymbol>();
// %
return new LexTokenImpl<LexTokenKind::PercentSymbol>(TextSpan(start, 1));
case u8'+': {
auto n = Peek();
if (n == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::PlusEqualsSymbol>();
// +=
return new LexTokenImpl<LexTokenKind::PlusEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'+') {
Progress();
return new LexTokenImpl<LexTokenKind::PlusPlusSymbol>();
// ++
return new LexTokenImpl<LexTokenKind::PlusPlusSymbol>(TextSpan(start, 2));
}
return new LexTokenImpl<LexTokenKind::PlusSymbol>();
// +
return new LexTokenImpl<LexTokenKind::PlusSymbol>(TextSpan(start, 1));
}
case u8'-': {
auto n = Peek();
if (n == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::MinusEqualsSymbol>();
// -=
return new LexTokenImpl<LexTokenKind::MinusEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'-') {
Progress();
return new LexTokenImpl<LexTokenKind::MinusMinusSymbol>();
// --
return new LexTokenImpl<LexTokenKind::MinusMinusSymbol>(TextSpan(start, 2));
}
return new LexTokenImpl<LexTokenKind::MinusSymbol>();
// -
return new LexTokenImpl<LexTokenKind::MinusSymbol>(TextSpan(start, 1));
}
case u8'<': {
auto n = Peek();
if (n == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::LessThanEqualsSymbol>();
// <=
return new LexTokenImpl<LexTokenKind::LessThanEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'<') {
Progress();
if (Peek() == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::LessThanLessThanEqualsSymbol>();
// <<=
return new LexTokenImpl<LexTokenKind::LessThanLessThanEqualsSymbol>(TextSpan(start, 3));
}
return new LexTokenImpl<LexTokenKind::LessThanLessThanSymbol>();
// <<
return new LexTokenImpl<LexTokenKind::LessThanLessThanSymbol>(TextSpan(start, 2));
}
return new LexTokenImpl<LexTokenKind::LessThanSymbol>();
// <
return new LexTokenImpl<LexTokenKind::LessThanSymbol>(TextSpan(start, 1));
}
case u8'>': {
auto n = Peek();
if (n == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::GreaterThanEqualsSymbol>();
// >=
return new LexTokenImpl<LexTokenKind::GreaterThanEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'>') {
Progress();
n = Peek();
if (n == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanEqualsSymbol>();
// >>=
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanEqualsSymbol>(TextSpan(start, 3));
}
if (n == u8'>') {
Progress();
if (Peek() == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanEqualsSymbol>();
// >>>=
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanEqualsSymbol>(
TextSpan(start, 4));
}
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanSymbol>();
// >>>
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanSymbol>(
TextSpan(start, 3));
}
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanSymbol>();
// >>
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanSymbol>(TextSpan(start, 2));
}
return new LexTokenImpl<LexTokenKind::GreaterThanSymbol>();
// >
return new LexTokenImpl<LexTokenKind::GreaterThanSymbol>(TextSpan(start, 1));
}
case u8'(': return new LexTokenImpl<LexTokenKind::OpenParenthesisSymbol>();
case u8')': return new LexTokenImpl<LexTokenKind::CloseParenthesisSymbol>();
case u8'(': return new LexTokenImpl<LexTokenKind::OpenParenthesisSymbol>(TextSpan(start, 1));
case u8')': return new LexTokenImpl<LexTokenKind::CloseParenthesisSymbol>(TextSpan(start, 1));
case u8'=': {
if (Peek() == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::EqualsEqualsSymbol>();
// ==
return new LexTokenImpl<LexTokenKind::EqualsEqualsSymbol>(TextSpan(start, 2));
}
return new LexTokenImpl<LexTokenKind::EqualsSymbol>();
// =
return new LexTokenImpl<LexTokenKind::EqualsSymbol>(TextSpan(start, 1));
}
case u8'!': {
auto n = Peek();
if (n == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::ExclamationMarkEqualsSymbol>();
// !=
return new LexTokenImpl<LexTokenKind::ExclamationMarkEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'i' && Peek(2) == u8's') {
Progress(2);
return new LexTokenImpl<LexTokenKind::ExclamationMarkIsSymbol>();
// !is
return new LexTokenImpl<LexTokenKind::ExclamationMarkIsSymbol>(TextSpan(start, 3));
}
return new LexTokenImpl<LexTokenKind::ExclamationMarkSymbol>();
// !
return new LexTokenImpl<LexTokenKind::ExclamationMarkSymbol>(TextSpan(start, 1));
}
case u8'?': return new LexTokenImpl<LexTokenKind::QuestionMarkSymbol>();
case u8'?': return new LexTokenImpl<LexTokenKind::QuestionMarkSymbol>(TextSpan(start, 1));
case u8':': {
if (Peek() == u8':') {
Progress();
return new LexTokenImpl<LexTokenKind::ColonColonSymbol>();
// ::
return new LexTokenImpl<LexTokenKind::ColonColonSymbol>(TextSpan(start, 2));
}
return new LexTokenImpl<LexTokenKind::ColonSymbol>();
// :
return new LexTokenImpl<LexTokenKind::ColonSymbol>(TextSpan(start, 1));
}
case u8'&': {
auto n = Peek();
if (n == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::AmpersandEqualsSymbol>();
// &=
return new LexTokenImpl<LexTokenKind::AmpersandEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'&') {
Progress();
return new LexTokenImpl<LexTokenKind::AmpersandAmpersandSymbol>();
// &&
return new LexTokenImpl<LexTokenKind::AmpersandAmpersandSymbol>(TextSpan(start, 2));
}
return new LexTokenImpl<LexTokenKind::AmpersandSymbol>();
// &
return new LexTokenImpl<LexTokenKind::AmpersandSymbol>(TextSpan(start, 1));
}
case u8',': return new LexTokenImpl<LexTokenKind::CommaSymbol>();
case u8'{': return new LexTokenImpl<LexTokenKind::OpenCurlyParenthesisSymbol>();
case u8'}': return new LexTokenImpl<LexTokenKind::CloseCurlyParenthesisSymbol>();
case u8';': return new LexTokenImpl<LexTokenKind::SemicolonSymbol>();
case u8',': return new LexTokenImpl<LexTokenKind::CommaSymbol>(TextSpan(start, 1));
case u8'{': return new LexTokenImpl<LexTokenKind::OpenCurlyParenthesisSymbol>(TextSpan(start, 1));
case u8'}': return new LexTokenImpl<LexTokenKind::CloseCurlyParenthesisSymbol>(TextSpan(start, 1));
case u8';': return new LexTokenImpl<LexTokenKind::SemicolonSymbol>(TextSpan(start, 1));
case u8'|': {
auto n = Peek();
if (n == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::VerticalLineEqualsSymbol>();
// |=
return new LexTokenImpl<LexTokenKind::VerticalLineEqualsSymbol>(TextSpan(start, 2));
}
if (n == u8'|') {
Progress();
return new LexTokenImpl<LexTokenKind::VerticalLineVerticalLineSymbol>();
// ||
return new LexTokenImpl<LexTokenKind::VerticalLineVerticalLineSymbol>(TextSpan(start, 2));
}
return new LexTokenImpl<LexTokenKind::VerticalLineSymbol>();
// |
return new LexTokenImpl<LexTokenKind::VerticalLineSymbol>(TextSpan(start, 1));
}
case u8'^': {
auto n = Peek();
if (n == u8'=') {
Progress();
return new LexTokenImpl<LexTokenKind::CaretEqualsSymbol>();
// ^=
return new LexTokenImpl<LexTokenKind::CaretEqualsSymbol>(TextSpan(start, start + 2));
}
if (n == u8'^') {
Progress();
return new LexTokenImpl<LexTokenKind::CaretCaretSymbol>();
// ^^
return new LexTokenImpl<LexTokenKind::CaretCaretSymbol>(TextSpan(start, start + 2));
}
return new LexTokenImpl<LexTokenKind::CaretSymbol>();
// ^
return new LexTokenImpl<LexTokenKind::CaretSymbol>(TextSpan(start, start + 1));
}
case u8'~': return new LexTokenImpl<LexTokenKind::TildeSymbol>();
case u8'.': return new LexTokenImpl<LexTokenKind::DotSymbol>();
case u8'[': return new LexTokenImpl<LexTokenKind::OpenBlockParenthesisSymbol>();
case u8']': return new LexTokenImpl<LexTokenKind::CloseBlockParenthesisSymbol>();
case u8'@': return new LexTokenImpl<LexTokenKind::AtSymbol>();
case u8'~': return new LexTokenImpl<LexTokenKind::TildeSymbol>(TextSpan(start, start + 1));
case u8'.': return new LexTokenImpl<LexTokenKind::DotSymbol>(TextSpan(start, start + 1));
case u8'[': return new LexTokenImpl<LexTokenKind::OpenBlockParenthesisSymbol>(TextSpan(start, start + 1));
case u8']': return new LexTokenImpl<LexTokenKind::CloseBlockParenthesisSymbol>(TextSpan(start, start + 1));
case u8'@': return new LexTokenImpl<LexTokenKind::AtSymbol>(TextSpan(start, start + 1));
case u8' ':
case u8'\r':
case u8'\n':
case u8'\t': return new LexTokenImpl<LexTokenKind::Whitespace>();
case u8'\t': return new LexTokenImpl<LexTokenKind::Whitespace>(TextSpan(start, start + 1));
// Byte order mark
case u8'\xEF': {
if (Peek() == u8'\xBB' && Peek(2) == u8'\xBF') {
Progress(2);
return new LexTokenImpl<LexTokenKind::Whitespace>();
return new LexTokenImpl<LexTokenKind::Whitespace>(TextSpan(start, start + 3));
}
}
case u8'0':
@ -226,8 +269,8 @@ namespace ElohimScript::Parser {
default:
if (IsAlphaNumericalOrUnderscore(c))
return LexKeywordOrIdentifier();
// TODO: Log error
return new LexTokenImpl<LexTokenKind::Unknown>();
_diagnostics->LogError(Diagnostics::DiagnosticType::UnknownToken, TextSpan(start, start + 1));
return new LexTokenImpl<LexTokenKind::Unknown>(TextSpan(start, start + 1));
}
}
@ -248,7 +291,10 @@ namespace ElohimScript::Parser {
;
case 'b': numericalSystem = 2; break;
default:
// TODO: Log Invalid numerical system
_diagnostics->LogError(Diagnostics::DiagnosticType::InvalidNumericalBase,
TextSpan(_position - 1, _position + 1));
// Set to the largest numerical system, so we can prevent errors down the line.
numericalSystem = 16;
break;
}
}
@ -287,6 +333,7 @@ namespace ElohimScript::Parser {
}
LexToken* Lexer::LexDecimal(uint64_t initial) {
auto start = _position;
uint64_t value = initial;
uint64_t decimalValue = 0;
uint64_t exponentValue = 0;
@ -327,12 +374,13 @@ namespace ElohimScript::Parser {
if (isExponent) {
val *= pow(10, exponentValue);
}
return new FloatLiteral(val);
return new FloatLiteral(TextSpan(start, _position), val);
}
return new IntegerLiteral(value);
return new IntegerLiteral(TextSpan(start, _position), value);
}
IntegerLiteral* Lexer::LexHexadecimal() {
auto start = _position;
uint64_t value = 0;
while (true) {
auto v = LexHexadecimalValue(Peek());
@ -343,9 +391,10 @@ namespace ElohimScript::Parser {
value <<= 4;
value += v;
}
return new IntegerLiteral(value);
return new IntegerLiteral(TextSpan(start, _position), value);
}
IntegerLiteral* Lexer::LexOctal() {
auto start = _position;
uint64_t value = 0;
while (true) {
auto v = LexOctalValue(Peek());
@ -356,9 +405,10 @@ namespace ElohimScript::Parser {
value <<= 3;
value += v;
}
return new IntegerLiteral(value);
return new IntegerLiteral(TextSpan(start, _position), value);
}
IntegerLiteral* Lexer::LexBinary() {
auto start = _position;
uint64_t value = 0;
while (true) {
auto v = LexBinaryValue(Peek());
@ -369,7 +419,7 @@ namespace ElohimScript::Parser {
value <<= 1;
value += v;
}
return new IntegerLiteral(value);
return new IntegerLiteral(TextSpan(start, _position), value);
}
StringLiteral* Lexer::LexString(char8_t opening, bool heredoc) {
Progress();
@ -388,11 +438,13 @@ namespace ElohimScript::Parser {
break;
}
if (current == u8'\0') {
// TODO: Log error
_diagnostics->LogError(Diagnostics::DiagnosticType::ExpectedEndOfString,
TextSpan(start, start + offset));
break;
}
if (!heredoc && (current == u8'\n' || current == u8'\r')) {
// TODO: log error
_diagnostics->LogError(Diagnostics::DiagnosticType::ExpectedEndOfString,
TextSpan(start, start + offset));
break;
}
offset++;
@ -401,14 +453,15 @@ namespace ElohimScript::Parser {
if (heredoc) {
Progress(2);
}
return new StringLiteral(std::u8string(_script.substr(start, offset)));
return new StringLiteral(TextSpan(start, start + _position), std::u8string(_script.substr(start, offset)));
}
static uint32_t constexpr Hash(const char8_t* input) {
return *input ? static_cast<uint32_t>(*input) + 33 * Hash(input + 1) : 5381;
return *input != 0U ? static_cast<uint32_t>(*input) + 33 * Hash(input + 1) : 5381;
};
LexToken* Lexer::LexKeywordOrIdentifier() {
auto start = _position;
auto offset = 0;
while (IsAlphaNumericalOrUnderscore(Peek(offset))) {
offset++;
@ -416,137 +469,75 @@ namespace ElohimScript::Parser {
auto str = _script.substr(_position, offset);
Progress(offset);
switch (Hash(str.data())) {
case Hash(u8"and"):
return new LexTokenImpl<LexTokenKind::AndKeyword>();
case Hash(u8"abstract"):
return new LexTokenImpl<LexTokenKind::AbstractKeyword>();
case Hash(u8"auto"):
return new LexTokenImpl<LexTokenKind::AutoKeyword>();
case Hash(u8"bool"):
return new LexTokenImpl<LexTokenKind::BoolKeyword>();
case Hash(u8"break"):
return new LexTokenImpl<LexTokenKind::BreakKeyword>();
case Hash(u8"case"):
return new LexTokenImpl<LexTokenKind::CaseKeyword>();
case Hash(u8"cast"):
return new LexTokenImpl<LexTokenKind::CastKeyword>();
case Hash(u8"catch"):
return new LexTokenImpl<LexTokenKind::CatchKeyword>();
case Hash(u8"class"):
return new LexTokenImpl<LexTokenKind::ClassKeyword>();
case Hash(u8"const"):
return new LexTokenImpl<LexTokenKind::ConstKeyword>();
case Hash(u8"continue"):
return new LexTokenImpl<LexTokenKind::ContinueKeyword>();
case Hash(u8"default"):
return new LexTokenImpl<LexTokenKind::DefaultKeyword>();
case Hash(u8"do"):
return new LexTokenImpl<LexTokenKind::DoKeyword>();
case Hash(u8"double"):
return new LexTokenImpl<LexTokenKind::DoubleKeyword>();
case Hash(u8"else"):
return new LexTokenImpl<LexTokenKind::ElseKeyword>();
case Hash(u8"enum"):
return new LexTokenImpl<LexTokenKind::EnumKeyword>();
case Hash(u8"explicit"):
return new LexTokenImpl<LexTokenKind::ExplicitKeyword>();
case Hash(u8"external"):
return new LexTokenImpl<LexTokenKind::ExternalKeyword>();
case Hash(u8"false"):
return new LexTokenImpl<LexTokenKind::FalseKeyword>();
case Hash(u8"final"):
return new LexTokenImpl<LexTokenKind::FinalKeyword>();
case Hash(u8"float"):
return new LexTokenImpl<LexTokenKind::FloatKeyword>();
case Hash(u8"for"):
return new LexTokenImpl<LexTokenKind::ForKeyword>();
case Hash(u8"from"):
return new LexTokenImpl<LexTokenKind::FromKeyword>();
case Hash(u8"funcdef"):
return new LexTokenImpl<LexTokenKind::FuncdefKeyword>();
case Hash(u8"function"):
return new LexTokenImpl<LexTokenKind::FunctionKeyword>();
case Hash(u8"get"):
return new LexTokenImpl<LexTokenKind::GetKeyword>();
case Hash(u8"if"):
return new LexTokenImpl<LexTokenKind::IfKeyword>();
case Hash(u8"import"):
return new LexTokenImpl<LexTokenKind::ImportKeyword>();
case Hash(u8"in"):
return new LexTokenImpl<LexTokenKind::InKeyword>();
case Hash(u8"inout"):
return new LexTokenImpl<LexTokenKind::InoutKeyword>();
case Hash(u8"int"):
return new LexTokenImpl<LexTokenKind::IntKeyword>();
case Hash(u8"and"): return new LexTokenImpl<LexTokenKind::AndKeyword>(TextSpan(start, _position));
case Hash(u8"abstract"): return new LexTokenImpl<LexTokenKind::AbstractKeyword>(TextSpan(start, _position));
case Hash(u8"auto"): return new LexTokenImpl<LexTokenKind::AutoKeyword>(TextSpan(start, _position));
case Hash(u8"bool"): return new LexTokenImpl<LexTokenKind::BoolKeyword>(TextSpan(start, _position));
case Hash(u8"break"): return new LexTokenImpl<LexTokenKind::BreakKeyword>(TextSpan(start, _position));
case Hash(u8"case"): return new LexTokenImpl<LexTokenKind::CaseKeyword>(TextSpan(start, _position));
case Hash(u8"cast"): return new LexTokenImpl<LexTokenKind::CastKeyword>(TextSpan(start, _position));
case Hash(u8"catch"): return new LexTokenImpl<LexTokenKind::CatchKeyword>(TextSpan(start, _position));
case Hash(u8"class"): return new LexTokenImpl<LexTokenKind::ClassKeyword>(TextSpan(start, _position));
case Hash(u8"const"): return new LexTokenImpl<LexTokenKind::ConstKeyword>(TextSpan(start, _position));
case Hash(u8"continue"): return new LexTokenImpl<LexTokenKind::ContinueKeyword>(TextSpan(start, _position));
case Hash(u8"default"): return new LexTokenImpl<LexTokenKind::DefaultKeyword>(TextSpan(start, _position));
case Hash(u8"do"): return new LexTokenImpl<LexTokenKind::DoKeyword>(TextSpan(start, _position));
case Hash(u8"double"): return new LexTokenImpl<LexTokenKind::DoubleKeyword>(TextSpan(start, _position));
case Hash(u8"else"): return new LexTokenImpl<LexTokenKind::ElseKeyword>(TextSpan(start, _position));
case Hash(u8"enum"): return new LexTokenImpl<LexTokenKind::EnumKeyword>(TextSpan(start, _position));
case Hash(u8"explicit"): return new LexTokenImpl<LexTokenKind::ExplicitKeyword>(TextSpan(start, _position));
case Hash(u8"external"): return new LexTokenImpl<LexTokenKind::ExternalKeyword>(TextSpan(start, _position));
case Hash(u8"false"): return new LexTokenImpl<LexTokenKind::FalseKeyword>(TextSpan(start, _position));
case Hash(u8"final"): return new LexTokenImpl<LexTokenKind::FinalKeyword>(TextSpan(start, _position));
case Hash(u8"float"): return new LexTokenImpl<LexTokenKind::FloatKeyword>(TextSpan(start, _position));
case Hash(u8"for"): return new LexTokenImpl<LexTokenKind::ForKeyword>(TextSpan(start, _position));
case Hash(u8"from"): return new LexTokenImpl<LexTokenKind::FromKeyword>(TextSpan(start, _position));
case Hash(u8"funcdef"): return new LexTokenImpl<LexTokenKind::FuncdefKeyword>(TextSpan(start, _position));
case Hash(u8"function"): return new LexTokenImpl<LexTokenKind::FunctionKeyword>(TextSpan(start, _position));
case Hash(u8"get"): return new LexTokenImpl<LexTokenKind::GetKeyword>(TextSpan(start, _position));
case Hash(u8"if"): return new LexTokenImpl<LexTokenKind::IfKeyword>(TextSpan(start, _position));
case Hash(u8"import"): return new LexTokenImpl<LexTokenKind::ImportKeyword>(TextSpan(start, _position));
case Hash(u8"in"): return new LexTokenImpl<LexTokenKind::InKeyword>(TextSpan(start, _position));
case Hash(u8"inout"): return new LexTokenImpl<LexTokenKind::InoutKeyword>(TextSpan(start, _position));
case Hash(u8"int"): return new LexTokenImpl<LexTokenKind::IntKeyword>(TextSpan(start, _position));
case Hash(u8"interface"):
return new LexTokenImpl<LexTokenKind::InterfaceKeyword>();
case Hash(u8"int8"):
return new LexTokenImpl<LexTokenKind::Int8Keyword>();
case Hash(u8"int16"):
return new LexTokenImpl<LexTokenKind::Int16Keyword>();
case Hash(u8"int32"):
return new LexTokenImpl<LexTokenKind::Int32Keyword>();
case Hash(u8"int64"):
return new LexTokenImpl<LexTokenKind::Int64Keyword>();
case Hash(u8"is"):
return new LexTokenImpl<LexTokenKind::IsKeyword>();
case Hash(u8"mixin"):
return new LexTokenImpl<LexTokenKind::MixinKeyword>();
return new LexTokenImpl<LexTokenKind::InterfaceKeyword>(TextSpan(start, _position));
case Hash(u8"int8"): return new LexTokenImpl<LexTokenKind::Int8Keyword>(TextSpan(start, _position));
case Hash(u8"int16"): return new LexTokenImpl<LexTokenKind::Int16Keyword>(TextSpan(start, _position));
case Hash(u8"int32"): return new LexTokenImpl<LexTokenKind::Int32Keyword>(TextSpan(start, _position));
case Hash(u8"int64"): return new LexTokenImpl<LexTokenKind::Int64Keyword>(TextSpan(start, _position));
case Hash(u8"is"): return new LexTokenImpl<LexTokenKind::IsKeyword>(TextSpan(start, _position));
case Hash(u8"mixin"): return new LexTokenImpl<LexTokenKind::MixinKeyword>(TextSpan(start, _position));
case Hash(u8"namespace"):
return new LexTokenImpl<LexTokenKind::NamespaceKeyword>();
case Hash(u8"not"):
return new LexTokenImpl<LexTokenKind::NotKeyword>();
case Hash(u8"null"):
return new LexTokenImpl<LexTokenKind::NullKeyword>();
case Hash(u8"or"):
return new LexTokenImpl<LexTokenKind::OrKeyword>();
case Hash(u8"out"):
return new LexTokenImpl<LexTokenKind::OutKeyword>();
case Hash(u8"override"):
return new LexTokenImpl<LexTokenKind::OverrideKeyword>();
case Hash(u8"private"):
return new LexTokenImpl<LexTokenKind::PrivateKeyword>();
case Hash(u8"property"):
return new LexTokenImpl<LexTokenKind::PropertyKeyword>();
return new LexTokenImpl<LexTokenKind::NamespaceKeyword>(TextSpan(start, _position));
case Hash(u8"not"): return new LexTokenImpl<LexTokenKind::NotKeyword>(TextSpan(start, _position));
case Hash(u8"null"): return new LexTokenImpl<LexTokenKind::NullKeyword>(TextSpan(start, _position));
case Hash(u8"or"): return new LexTokenImpl<LexTokenKind::OrKeyword>(TextSpan(start, _position));
case Hash(u8"out"): return new LexTokenImpl<LexTokenKind::OutKeyword>(TextSpan(start, _position));
case Hash(u8"override"): return new LexTokenImpl<LexTokenKind::OverrideKeyword>(TextSpan(start, _position));
case Hash(u8"private"): return new LexTokenImpl<LexTokenKind::PrivateKeyword>(TextSpan(start, _position));
case Hash(u8"property"): return new LexTokenImpl<LexTokenKind::PropertyKeyword>(TextSpan(start, _position));
case Hash(u8"protected"):
return new LexTokenImpl<LexTokenKind::ProtectedKeyword>();
case Hash(u8"return"):
return new LexTokenImpl<LexTokenKind::ReturnKeyword>();
case Hash(u8"set"):
return new LexTokenImpl<LexTokenKind::SetKeyword>();
case Hash(u8"shared"):
return new LexTokenImpl<LexTokenKind::SharedKeyword>();
case Hash(u8"super"):
return new LexTokenImpl<LexTokenKind::SuperKeyword>();
case Hash(u8"switch"):
return new LexTokenImpl<LexTokenKind::SwitchKeyword>();
case Hash(u8"this"):
return new LexTokenImpl<LexTokenKind::ThisKeyword>();
case Hash(u8"true"):
return new LexTokenImpl<LexTokenKind::TrueKeyword>();
case Hash(u8"try"):
return new LexTokenImpl<LexTokenKind::TryKeyword>();
case Hash(u8"typedef"):
return new LexTokenImpl<LexTokenKind::TypedefKeyword>();
case Hash(u8"uint"):
return new LexTokenImpl<LexTokenKind::UintKeyword>();
case Hash(u8"uint8"):
return new LexTokenImpl<LexTokenKind::Uint8Keyword>();
case Hash(u8"uint16"):
return new LexTokenImpl<LexTokenKind::Uint16Keyword>();
case Hash(u8"uint32"):
return new LexTokenImpl<LexTokenKind::Uint32Keyword>();
case Hash(u8"uint64"):
return new LexTokenImpl<LexTokenKind::Uint64Keyword>();
case Hash(u8"void"):
return new LexTokenImpl<LexTokenKind::VoidKeyword>();
case Hash(u8"while"):
return new LexTokenImpl<LexTokenKind::WhileKeyword>();
case Hash(u8"xor"):
return new LexTokenImpl<LexTokenKind::XorKeyword>();
return new LexTokenImpl<LexTokenKind::ProtectedKeyword>(TextSpan(start, _position));
case Hash(u8"return"): return new LexTokenImpl<LexTokenKind::ReturnKeyword>(TextSpan(start, _position));
case Hash(u8"set"): return new LexTokenImpl<LexTokenKind::SetKeyword>(TextSpan(start, _position));
case Hash(u8"shared"): return new LexTokenImpl<LexTokenKind::SharedKeyword>(TextSpan(start, _position));
case Hash(u8"super"): return new LexTokenImpl<LexTokenKind::SuperKeyword>(TextSpan(start, _position));
case Hash(u8"switch"): return new LexTokenImpl<LexTokenKind::SwitchKeyword>(TextSpan(start, _position));
case Hash(u8"this"): return new LexTokenImpl<LexTokenKind::ThisKeyword>(TextSpan(start, _position));
case Hash(u8"true"): return new LexTokenImpl<LexTokenKind::TrueKeyword>(TextSpan(start, _position));
case Hash(u8"try"): return new LexTokenImpl<LexTokenKind::TryKeyword>(TextSpan(start, _position));
case Hash(u8"typedef"): return new LexTokenImpl<LexTokenKind::TypedefKeyword>(TextSpan(start, _position));
case Hash(u8"uint"): return new LexTokenImpl<LexTokenKind::UintKeyword>(TextSpan(start, _position));
case Hash(u8"uint8"): return new LexTokenImpl<LexTokenKind::Uint8Keyword>(TextSpan(start, _position));
case Hash(u8"uint16"): return new LexTokenImpl<LexTokenKind::Uint16Keyword>(TextSpan(start, _position));
case Hash(u8"uint32"): return new LexTokenImpl<LexTokenKind::Uint32Keyword>(TextSpan(start, _position));
case Hash(u8"uint64"): return new LexTokenImpl<LexTokenKind::Uint64Keyword>(TextSpan(start, _position));
case Hash(u8"void"): return new LexTokenImpl<LexTokenKind::VoidKeyword>(TextSpan(start, _position));
case Hash(u8"while"): return new LexTokenImpl<LexTokenKind::WhileKeyword>(TextSpan(start, _position));
case Hash(u8"xor"): return new LexTokenImpl<LexTokenKind::XorKeyword>(TextSpan(start, _position));
default:
return new IdentifierToken(std::u8string(str));
default: return new IdentifierToken(TextSpan(start, _position), std::u8string(str));
}
}
bool Lexer::IsAlphaNumericalOrUnderscore(char8_t c) {

View File

@ -2,19 +2,22 @@
#define ELOHIMSCRIPT_LEXER_HPP
#include <string_view>
#include "../../Diagnostics/Diagnostics.hpp"
#include "LexToken.hpp"
namespace ElohimScript::Parser {
class Lexer {
public:
Lexer(const char* script) : _script(reinterpret_cast<const char8_t*>(script)) {}
Lexer(const char8_t* script) : _script(script) {}
Lexer(std::u8string_view script) : _script(script) {}
Lexer(const char* script, Diagnostics::Diagnostics* diag)
: Lexer(reinterpret_cast<const char8_t*>(script), diag) {}
Lexer(const char8_t* script, Diagnostics::Diagnostics* diag) : _script(script), _diagnostics(diag) {}
Lexer(std::u8string_view script, Diagnostics::Diagnostics* diag) : _script(script), _diagnostics(diag) {}
const LexToken* Lex();
private:
std::u8string_view _script;
size_t _position = -1;
Diagnostics::Diagnostics* _diagnostics;
inline char8_t Consume() {
if (++_position >= _script.size()) {
@ -23,9 +26,7 @@ namespace ElohimScript::Parser {
return _script[_position];
}
inline void Progress(size_t steps = 1){
_position += steps;
}
inline void Progress(size_t steps = 1) { _position += steps; }
inline char8_t Peek(size_t offset = 1) {
auto pos = _position + offset;

19
src/Parser/TextSpan.hpp Normal file
View File

@ -0,0 +1,19 @@
#ifndef ELOHIMSCRIPT_TEXTSPAN_HPP
#define ELOHIMSCRIPT_TEXTSPAN_HPP
#include <cstddef>
namespace ElohimScript {
class TextSpan {
size_t _start;
size_t _end;
public:
inline TextSpan(size_t start, size_t end) : _start(start), _end(end) {}
[[nodiscard]] inline size_t GetStart() const noexcept { return _start; }
[[nodiscard]] inline size_t GetEnd() const noexcept { return _end; }
inline bool operator==(const TextSpan& rhs) const { return _start == rhs._start && _end == rhs._end; }
inline bool operator!=(const TextSpan& rhs) const { return !(rhs == *this); }
};
}
#endif // ELOHIMSCRIPT_TEXTSPAN_HPP

View File

@ -5,8 +5,10 @@ using namespace ElohimScript::Parser;
#define KEYWORD_TEST(script, symbol) \
TEST_CASE("Lex " script) { \
auto lexer = Lexer(script); \
ElohimScript::Diagnostics::Diagnostics diag; \
auto lexer = Lexer(script, &diag); \
const auto* token = lexer.Lex(); \
CHECK(diag.GetMessages().empty()); \
CHECK(token->GetKind() == LexTokenKind::symbol); \
CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile); \
delete token; \
@ -91,8 +93,10 @@ namespace doctest {
#define IDENTIFIER_TEST(identifier) \
TEST_CASE("Lex identifier " identifier) { \
auto lexer = Lexer(identifier); \
ElohimScript::Diagnostics::Diagnostics diag; \
auto lexer = Lexer(identifier, &diag); \
const auto* token = lexer.Lex(); \
CHECK(diag.GetMessages().empty()); \
REQUIRE(token->GetKind() == LexTokenKind::Identifier); \
auto value = ((IdentifierToken*)token)->GetValue(); \
CHECK(value == std::u8string(reinterpret_cast<const char8_t*>(identifier))); \

View File

@ -5,8 +5,10 @@ using namespace ElohimScript::Parser;
#define INTEGER_TEST(script, expected) \
TEST_CASE("Lex " script) { \
auto lexer = Lexer(script); \
ElohimScript::Diagnostics::Diagnostics diag; \
auto lexer = Lexer(script, &diag); \
const auto* token = lexer.Lex(); \
CHECK(diag.GetMessages().empty()); \
REQUIRE(token->GetKind() == LexTokenKind::IntegerLiteral); \
auto value = ((const IntegerLiteral*)token)->GetValue(); \
CHECK(value == (expected)); \
@ -16,8 +18,10 @@ using namespace ElohimScript::Parser;
#define FLOAT_TEST(script, expected) \
TEST_CASE("Lex " script) { \
auto lexer = Lexer(script); \
ElohimScript::Diagnostics::Diagnostics diag; \
auto lexer = Lexer(script, &diag); \
const auto* token = lexer.Lex(); \
CHECK(diag.GetMessages().empty()); \
REQUIRE(token->GetKind() == LexTokenKind::FloatLiteral); \
auto value = ((const FloatLiteral*)token)->GetValue(); \
CHECK(value == (expected)); \
@ -63,4 +67,16 @@ INTEGER_TEST("0b1111", 15);
INTEGER_TEST("0b110011", 51);
#undef INTEGER_TEST
#undef FLOAT_TEST
#undef FLOAT_TEST
TEST_CASE("Lex invalid numerical base") {
ElohimScript::Diagnostics::Diagnostics diag;
auto lexer = Lexer("0f553", &diag);
const auto* token = lexer.Lex();
const auto& messages = diag.GetMessages();
REQUIRE(messages.size() == 1);
CHECK(messages[0].GetType() == ElohimScript::Diagnostics::DiagnosticType::InvalidNumericalBase);
CHECK(messages[0].GetLevel() == ElohimScript::Diagnostics::DiagnosticLevel::Error);
CHECK(messages[0].GetSpan() == ElohimScript::TextSpan(0, 2));
delete token;
}

View File

@ -5,8 +5,10 @@ using namespace ElohimScript::Parser;
#define STRING_TEST(str, constraint) \
TEST_CASE("Lex string " constraint str constraint) { \
auto lexer = Lexer(constraint str constraint); \
ElohimScript::Diagnostics::Diagnostics diag; \
auto lexer = Lexer(constraint str constraint, &diag); \
const auto* token = lexer.Lex(); \
CHECK(diag.GetMessages().empty()); \
REQUIRE(token->GetKind() == LexTokenKind::StringLiteral); \
auto value = ((const StringLiteral*)token)->GetValue(); \
CHECK(value == std::u8string(reinterpret_cast<const char8_t*>(str))); \
@ -21,9 +23,12 @@ STRING_TEST("\"foo bar\"", "\"\"\"");
STRING_TEST("\"\"foo bar\"\"", "\"\"\"");
TEST_CASE("Lex multiline string") {
ElohimScript::Diagnostics::Diagnostics diag;
auto lexer = Lexer(R"("""foo
bar""")");
bar""")",
&diag);
const auto* token = lexer.Lex();
CHECK(diag.GetMessages().empty());
REQUIRE(token->GetKind() == LexTokenKind::StringLiteral);
auto value = (dynamic_cast<const StringLiteral*>(token))->GetValue();
CHECK(value == std::u8string(reinterpret_cast<const char8_t*>(R"(foo

View File

@ -6,8 +6,10 @@ using namespace ElohimScript::Parser;
#define SYMBOL_TEST(script, symbol) \
TEST_CASE("Lex " script) { \
auto lexer = Lexer(script); \
ElohimScript::Diagnostics::Diagnostics diag; \
auto lexer = Lexer(script, &diag); \
const auto* token = lexer.Lex(); \
CHECK(diag.GetMessages().empty()); \
CHECK(token->GetKind() == LexTokenKind::symbol); \
CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile); \
delete token; \
@ -71,9 +73,11 @@ SYMBOL_TEST(" ", Whitespace)
TEST_CASE("Lex whitespace") {
auto whitespace = {" ", "\t", "\n", "\r", "\xef\xbb\xbf"};
for (auto v : whitespace) {
auto lexer = Lexer(v);
for (const auto *v : whitespace) {
ElohimScript::Diagnostics::Diagnostics diag;
auto lexer = Lexer(v, &diag);
const auto* token = lexer.Lex();
CHECK(diag.GetMessages().empty());
CHECK(token->GetKind() == LexTokenKind::Whitespace);
CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile);
delete token;