Support for diagnostics system.

This commit is contained in:
Deukhoofd 2020-10-04 19:38:13 +02:00
parent 20976010d6
commit b6a5e047c2
Signed by: Deukhoofd
GPG Key ID: F63E044490819F6F
13 changed files with 347 additions and 217 deletions

View File

@ -0,0 +1,23 @@
#ifndef ELOHIMSCRIPT_DIAGNOSTIC_HPP
#define ELOHIMSCRIPT_DIAGNOSTIC_HPP
#include "../Parser/TextSpan.hpp"
#include "DiagnosticLevel.hpp"
#include "DiagnosticType.hpp"
namespace ElohimScript::Diagnostics {
class Diagnostic {
DiagnosticLevel _level;
DiagnosticType _type;
TextSpan _span;
public:
inline Diagnostic(DiagnosticLevel level, DiagnosticType type, TextSpan span)
: _level(level), _type(type), _span(span) {}
[[nodiscard]] inline DiagnosticLevel GetLevel() const noexcept { return _level; }
[[nodiscard]] inline DiagnosticType GetType() const noexcept { return _type; }
[[nodiscard]] inline const TextSpan& GetSpan() const noexcept { return _span; }
};
}
#endif // ELOHIMSCRIPT_DIAGNOSTIC_HPP

View File

@ -0,0 +1,15 @@
#ifndef ELOHIMSCRIPT_DIAGNOSTICLEVEL_HPP
#define ELOHIMSCRIPT_DIAGNOSTICLEVEL_HPP
#include <cstdint>
namespace ElohimScript::Diagnostics {
enum class DiagnosticLevel : uint8_t {
Trace,
Information,
Warning,
Error,
Critical,
};
}
#endif // ELOHIMSCRIPT_DIAGNOSTICLEVEL_HPP

View File

@ -0,0 +1,20 @@
#ifndef ELOHIMSCRIPT_DIAGNOSTICTYPE_HPP
#define ELOHIMSCRIPT_DIAGNOSTICTYPE_HPP
#include <string>
namespace ElohimScript::Diagnostics {
enum class DiagnosticType : uint8_t { UnknownToken, InvalidNumericalBase, ExpectedEndOfString };
class DiagnosticTypeHelper {
static std::string ToEnglishString(DiagnosticType type) {
switch (type) {
case DiagnosticType::UnknownToken: return "Unknown token";
case DiagnosticType::InvalidNumericalBase: return "Invalid numerical base";
case DiagnosticType::ExpectedEndOfString: return "Expected end of string";
}
return std::to_string((uint8_t)type);
}
};
}
#endif // ELOHIMSCRIPT_DIAGNOSTICTYPE_HPP

View File

@ -0,0 +1 @@
#include "Diagnostics.hpp"

View File

@ -0,0 +1,25 @@
#ifndef ELOHIMSCRIPT_DIAGNOSTICS_HPP
#define ELOHIMSCRIPT_DIAGNOSTICS_HPP
#include <vector>
#include "Diagnostic.hpp"
namespace ElohimScript::Diagnostics {
class Diagnostics {
std::vector<Diagnostic> _messages;
public:
inline void Log(DiagnosticLevel level, DiagnosticType type, TextSpan span) {
_messages.emplace_back(level, type, span);
}
inline void LogTrace(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Trace, type, span); }
inline void LogInfo(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Information, type, span); }
inline void LogWarning(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Warning, type, span); }
inline void LogError(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Error, type, span); }
inline void LogCritical(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Critical, type, span); }
[[nodiscard]] const std::vector<Diagnostic>& GetMessages() const noexcept { return _messages; }
};
}
#endif // ELOHIMSCRIPT_DIAGNOSTICS_HPP

View File

@ -10,16 +10,19 @@ namespace ElohimScript::Parser {
friend class Lexer; friend class Lexer;
std::unique_ptr<const LexToken> _next; std::unique_ptr<const LexToken> _next;
TextSpan _span;
public: public:
LexToken(TextSpan span) : _span(span) {}
virtual ~LexToken() = default; virtual ~LexToken() = default;
[[nodiscard]] virtual LexTokenKind GetKind() const noexcept = 0; [[nodiscard]] virtual LexTokenKind GetKind() const noexcept = 0;
[[nodiscard]] const std::unique_ptr<const LexToken>& GetNext() const noexcept { return _next; } [[nodiscard]] const std::unique_ptr<const LexToken>& GetNext() const noexcept { return _next; }
[[nodiscard]] const TextSpan& GetSpan() const noexcept { return _span; }
}; };
template <LexTokenKind kind> class LexTokenImpl : public LexToken { template <LexTokenKind kind> class LexTokenImpl : public LexToken {
public: public:
LexTokenImpl() = default; LexTokenImpl(TextSpan span) : LexToken(span){};
[[nodiscard]] LexTokenKind GetKind() const noexcept override { return kind; } [[nodiscard]] LexTokenKind GetKind() const noexcept override { return kind; }
}; };
@ -27,7 +30,8 @@ namespace ElohimScript::Parser {
uint64_t _value; uint64_t _value;
public: public:
IntegerLiteral(uint64_t value) : _value(value) {} IntegerLiteral(TextSpan span, uint64_t value)
: LexTokenImpl<LexTokenKind::IntegerLiteral>(span), _value(value) {}
[[nodiscard]] uint64_t GetValue() const noexcept { return _value; } [[nodiscard]] uint64_t GetValue() const noexcept { return _value; }
}; };
@ -35,7 +39,7 @@ namespace ElohimScript::Parser {
double _value; double _value;
public: public:
FloatLiteral(double value) : _value(value) {} FloatLiteral(TextSpan span, double value) : LexTokenImpl<LexTokenKind::FloatLiteral>(span), _value(value) {}
[[nodiscard]] double GetValue() const noexcept { return _value; } [[nodiscard]] double GetValue() const noexcept { return _value; }
}; };
@ -43,7 +47,8 @@ namespace ElohimScript::Parser {
std::u8string _value; std::u8string _value;
public: public:
StringLiteral(std::u8string value) : _value(std::move(value)) {} StringLiteral(TextSpan span, std::u8string value)
: LexTokenImpl<LexTokenKind::StringLiteral>(span), _value(std::move(value)) {}
[[nodiscard]] const std::u8string& GetValue() const noexcept { return _value; } [[nodiscard]] const std::u8string& GetValue() const noexcept { return _value; }
}; };
@ -51,7 +56,8 @@ namespace ElohimScript::Parser {
std::u8string _value; std::u8string _value;
public: public:
IdentifierToken(std::u8string value) : _value(std::move(value)) {} IdentifierToken(TextSpan span, std::u8string value)
: LexTokenImpl<LexTokenKind::Identifier>(span), _value(std::move(value)) {}
[[nodiscard]] const std::u8string& GetValue() const noexcept { return _value; } [[nodiscard]] const std::u8string& GetValue() const noexcept { return _value; }
}; };
} }

View File

@ -22,9 +22,10 @@ namespace ElohimScript::Parser {
} }
LexToken* Lexer::LexNext() { LexToken* Lexer::LexNext() {
auto start = _position;
auto c = Consume(); auto c = Consume();
switch (c) { switch (c) {
case u8'\0': return new LexTokenImpl<LexTokenKind::EndOfFile>(); case u8'\0': return new LexTokenImpl<LexTokenKind::EndOfFile>(TextSpan(start, 1));
case u8'*': { case u8'*': {
auto n = Peek(); auto n = Peek();
if (n == u8'*') { if (n == u8'*') {
@ -32,177 +33,219 @@ namespace ElohimScript::Parser {
n = Peek(); n = Peek();
if (n == u8'=') { if (n == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::StarStarEqualsSymbol>(); // **=
return new LexTokenImpl<LexTokenKind::StarStarEqualsSymbol>(TextSpan(start, 3));
} }
return new LexTokenImpl<LexTokenKind::StarStarSymbol>(); // **
return new LexTokenImpl<LexTokenKind::StarStarSymbol>(TextSpan(start, 2));
} }
if (n == u8'=') { if (n == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::StarEqualsSymbol>(); // *=
return new LexTokenImpl<LexTokenKind::StarEqualsSymbol>(TextSpan(start, 2));
} }
return new LexTokenImpl<LexTokenKind::StarSymbol>(); // *
return new LexTokenImpl<LexTokenKind::StarSymbol>(TextSpan(start, 1));
} }
case u8'/': case u8'/':
if (Peek() == u8'=') { if (Peek() == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::SlashEqualsSymbol>(); // /=
return new LexTokenImpl<LexTokenKind::SlashEqualsSymbol>(TextSpan(start, 2));
} }
return new LexTokenImpl<LexTokenKind::SlashSymbol>(); // /
return new LexTokenImpl<LexTokenKind::SlashSymbol>(TextSpan(start, 1));
case u8'%': case u8'%':
if (Peek() == u8'=') { if (Peek() == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::PercentEqualsSymbol>(); // %=
return new LexTokenImpl<LexTokenKind::PercentEqualsSymbol>(TextSpan(start, 2));
} }
return new LexTokenImpl<LexTokenKind::PercentSymbol>(); // %
return new LexTokenImpl<LexTokenKind::PercentSymbol>(TextSpan(start, 1));
case u8'+': { case u8'+': {
auto n = Peek(); auto n = Peek();
if (n == u8'=') { if (n == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::PlusEqualsSymbol>(); // +=
return new LexTokenImpl<LexTokenKind::PlusEqualsSymbol>(TextSpan(start, 2));
} }
if (n == u8'+') { if (n == u8'+') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::PlusPlusSymbol>(); // ++
return new LexTokenImpl<LexTokenKind::PlusPlusSymbol>(TextSpan(start, 2));
} }
return new LexTokenImpl<LexTokenKind::PlusSymbol>(); // +
return new LexTokenImpl<LexTokenKind::PlusSymbol>(TextSpan(start, 1));
} }
case u8'-': { case u8'-': {
auto n = Peek(); auto n = Peek();
if (n == u8'=') { if (n == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::MinusEqualsSymbol>(); // -=
return new LexTokenImpl<LexTokenKind::MinusEqualsSymbol>(TextSpan(start, 2));
} }
if (n == u8'-') { if (n == u8'-') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::MinusMinusSymbol>(); // --
return new LexTokenImpl<LexTokenKind::MinusMinusSymbol>(TextSpan(start, 2));
} }
return new LexTokenImpl<LexTokenKind::MinusSymbol>(); // -
return new LexTokenImpl<LexTokenKind::MinusSymbol>(TextSpan(start, 1));
} }
case u8'<': { case u8'<': {
auto n = Peek(); auto n = Peek();
if (n == u8'=') { if (n == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::LessThanEqualsSymbol>(); // <=
return new LexTokenImpl<LexTokenKind::LessThanEqualsSymbol>(TextSpan(start, 2));
} }
if (n == u8'<') { if (n == u8'<') {
Progress(); Progress();
if (Peek() == u8'=') { if (Peek() == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::LessThanLessThanEqualsSymbol>(); // <<=
return new LexTokenImpl<LexTokenKind::LessThanLessThanEqualsSymbol>(TextSpan(start, 3));
} }
return new LexTokenImpl<LexTokenKind::LessThanLessThanSymbol>(); // <<
return new LexTokenImpl<LexTokenKind::LessThanLessThanSymbol>(TextSpan(start, 2));
} }
return new LexTokenImpl<LexTokenKind::LessThanSymbol>(); // <
return new LexTokenImpl<LexTokenKind::LessThanSymbol>(TextSpan(start, 1));
} }
case u8'>': { case u8'>': {
auto n = Peek(); auto n = Peek();
if (n == u8'=') { if (n == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::GreaterThanEqualsSymbol>(); // >=
return new LexTokenImpl<LexTokenKind::GreaterThanEqualsSymbol>(TextSpan(start, 2));
} }
if (n == u8'>') { if (n == u8'>') {
Progress(); Progress();
n = Peek(); n = Peek();
if (n == u8'=') { if (n == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanEqualsSymbol>(); // >>=
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanEqualsSymbol>(TextSpan(start, 3));
} }
if (n == u8'>') { if (n == u8'>') {
Progress(); Progress();
if (Peek() == u8'=') { if (Peek() == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanEqualsSymbol>(); // >>>=
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanEqualsSymbol>(
TextSpan(start, 4));
} }
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanSymbol>(); // >>>
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanGreaterThanSymbol>(
TextSpan(start, 3));
} }
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanSymbol>(); // >>
return new LexTokenImpl<LexTokenKind::GreaterThanGreaterThanSymbol>(TextSpan(start, 2));
} }
return new LexTokenImpl<LexTokenKind::GreaterThanSymbol>(); // >
return new LexTokenImpl<LexTokenKind::GreaterThanSymbol>(TextSpan(start, 1));
} }
case u8'(': return new LexTokenImpl<LexTokenKind::OpenParenthesisSymbol>(); case u8'(': return new LexTokenImpl<LexTokenKind::OpenParenthesisSymbol>(TextSpan(start, 1));
case u8')': return new LexTokenImpl<LexTokenKind::CloseParenthesisSymbol>(); case u8')': return new LexTokenImpl<LexTokenKind::CloseParenthesisSymbol>(TextSpan(start, 1));
case u8'=': { case u8'=': {
if (Peek() == u8'=') { if (Peek() == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::EqualsEqualsSymbol>(); // ==
return new LexTokenImpl<LexTokenKind::EqualsEqualsSymbol>(TextSpan(start, 2));
} }
return new LexTokenImpl<LexTokenKind::EqualsSymbol>(); // =
return new LexTokenImpl<LexTokenKind::EqualsSymbol>(TextSpan(start, 1));
} }
case u8'!': { case u8'!': {
auto n = Peek(); auto n = Peek();
if (n == u8'=') { if (n == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::ExclamationMarkEqualsSymbol>(); // !=
return new LexTokenImpl<LexTokenKind::ExclamationMarkEqualsSymbol>(TextSpan(start, 2));
} }
if (n == u8'i' && Peek(2) == u8's') { if (n == u8'i' && Peek(2) == u8's') {
Progress(2); Progress(2);
return new LexTokenImpl<LexTokenKind::ExclamationMarkIsSymbol>(); // !is
return new LexTokenImpl<LexTokenKind::ExclamationMarkIsSymbol>(TextSpan(start, 3));
} }
return new LexTokenImpl<LexTokenKind::ExclamationMarkSymbol>(); // !
return new LexTokenImpl<LexTokenKind::ExclamationMarkSymbol>(TextSpan(start, 1));
} }
case u8'?': return new LexTokenImpl<LexTokenKind::QuestionMarkSymbol>(); case u8'?': return new LexTokenImpl<LexTokenKind::QuestionMarkSymbol>(TextSpan(start, 1));
case u8':': { case u8':': {
if (Peek() == u8':') { if (Peek() == u8':') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::ColonColonSymbol>(); // ::
return new LexTokenImpl<LexTokenKind::ColonColonSymbol>(TextSpan(start, 2));
} }
return new LexTokenImpl<LexTokenKind::ColonSymbol>(); // :
return new LexTokenImpl<LexTokenKind::ColonSymbol>(TextSpan(start, 1));
} }
case u8'&': { case u8'&': {
auto n = Peek(); auto n = Peek();
if (n == u8'=') { if (n == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::AmpersandEqualsSymbol>(); // &=
return new LexTokenImpl<LexTokenKind::AmpersandEqualsSymbol>(TextSpan(start, 2));
} }
if (n == u8'&') { if (n == u8'&') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::AmpersandAmpersandSymbol>(); // &&
return new LexTokenImpl<LexTokenKind::AmpersandAmpersandSymbol>(TextSpan(start, 2));
} }
return new LexTokenImpl<LexTokenKind::AmpersandSymbol>(); // &
return new LexTokenImpl<LexTokenKind::AmpersandSymbol>(TextSpan(start, 1));
} }
case u8',': return new LexTokenImpl<LexTokenKind::CommaSymbol>(); case u8',': return new LexTokenImpl<LexTokenKind::CommaSymbol>(TextSpan(start, 1));
case u8'{': return new LexTokenImpl<LexTokenKind::OpenCurlyParenthesisSymbol>(); case u8'{': return new LexTokenImpl<LexTokenKind::OpenCurlyParenthesisSymbol>(TextSpan(start, 1));
case u8'}': return new LexTokenImpl<LexTokenKind::CloseCurlyParenthesisSymbol>(); case u8'}': return new LexTokenImpl<LexTokenKind::CloseCurlyParenthesisSymbol>(TextSpan(start, 1));
case u8';': return new LexTokenImpl<LexTokenKind::SemicolonSymbol>(); case u8';': return new LexTokenImpl<LexTokenKind::SemicolonSymbol>(TextSpan(start, 1));
case u8'|': { case u8'|': {
auto n = Peek(); auto n = Peek();
if (n == u8'=') { if (n == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::VerticalLineEqualsSymbol>(); // |=
return new LexTokenImpl<LexTokenKind::VerticalLineEqualsSymbol>(TextSpan(start, 2));
} }
if (n == u8'|') { if (n == u8'|') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::VerticalLineVerticalLineSymbol>(); // ||
return new LexTokenImpl<LexTokenKind::VerticalLineVerticalLineSymbol>(TextSpan(start, 2));
} }
return new LexTokenImpl<LexTokenKind::VerticalLineSymbol>(); // |
return new LexTokenImpl<LexTokenKind::VerticalLineSymbol>(TextSpan(start, 1));
} }
case u8'^': { case u8'^': {
auto n = Peek(); auto n = Peek();
if (n == u8'=') { if (n == u8'=') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::CaretEqualsSymbol>(); // ^=
return new LexTokenImpl<LexTokenKind::CaretEqualsSymbol>(TextSpan(start, start + 2));
} }
if (n == u8'^') { if (n == u8'^') {
Progress(); Progress();
return new LexTokenImpl<LexTokenKind::CaretCaretSymbol>(); // ^^
return new LexTokenImpl<LexTokenKind::CaretCaretSymbol>(TextSpan(start, start + 2));
} }
return new LexTokenImpl<LexTokenKind::CaretSymbol>(); // ^
return new LexTokenImpl<LexTokenKind::CaretSymbol>(TextSpan(start, start + 1));
} }
case u8'~': return new LexTokenImpl<LexTokenKind::TildeSymbol>(); case u8'~': return new LexTokenImpl<LexTokenKind::TildeSymbol>(TextSpan(start, start + 1));
case u8'.': return new LexTokenImpl<LexTokenKind::DotSymbol>(); case u8'.': return new LexTokenImpl<LexTokenKind::DotSymbol>(TextSpan(start, start + 1));
case u8'[': return new LexTokenImpl<LexTokenKind::OpenBlockParenthesisSymbol>(); case u8'[': return new LexTokenImpl<LexTokenKind::OpenBlockParenthesisSymbol>(TextSpan(start, start + 1));
case u8']': return new LexTokenImpl<LexTokenKind::CloseBlockParenthesisSymbol>(); case u8']': return new LexTokenImpl<LexTokenKind::CloseBlockParenthesisSymbol>(TextSpan(start, start + 1));
case u8'@': return new LexTokenImpl<LexTokenKind::AtSymbol>(); case u8'@': return new LexTokenImpl<LexTokenKind::AtSymbol>(TextSpan(start, start + 1));
case u8' ': case u8' ':
case u8'\r': case u8'\r':
case u8'\n': case u8'\n':
case u8'\t': return new LexTokenImpl<LexTokenKind::Whitespace>(); case u8'\t': return new LexTokenImpl<LexTokenKind::Whitespace>(TextSpan(start, start + 1));
// Byte order mark // Byte order mark
case u8'\xEF': { case u8'\xEF': {
if (Peek() == u8'\xBB' && Peek(2) == u8'\xBF') { if (Peek() == u8'\xBB' && Peek(2) == u8'\xBF') {
Progress(2); Progress(2);
return new LexTokenImpl<LexTokenKind::Whitespace>(); return new LexTokenImpl<LexTokenKind::Whitespace>(TextSpan(start, start + 3));
} }
} }
case u8'0': case u8'0':
@ -226,8 +269,8 @@ namespace ElohimScript::Parser {
default: default:
if (IsAlphaNumericalOrUnderscore(c)) if (IsAlphaNumericalOrUnderscore(c))
return LexKeywordOrIdentifier(); return LexKeywordOrIdentifier();
// TODO: Log error _diagnostics->LogError(Diagnostics::DiagnosticType::UnknownToken, TextSpan(start, start + 1));
return new LexTokenImpl<LexTokenKind::Unknown>(); return new LexTokenImpl<LexTokenKind::Unknown>(TextSpan(start, start + 1));
} }
} }
@ -248,7 +291,10 @@ namespace ElohimScript::Parser {
; ;
case 'b': numericalSystem = 2; break; case 'b': numericalSystem = 2; break;
default: default:
// TODO: Log Invalid numerical system _diagnostics->LogError(Diagnostics::DiagnosticType::InvalidNumericalBase,
TextSpan(_position - 1, _position + 1));
// Set to the largest numerical system, so we can prevent errors down the line.
numericalSystem = 16;
break; break;
} }
} }
@ -287,6 +333,7 @@ namespace ElohimScript::Parser {
} }
LexToken* Lexer::LexDecimal(uint64_t initial) { LexToken* Lexer::LexDecimal(uint64_t initial) {
auto start = _position;
uint64_t value = initial; uint64_t value = initial;
uint64_t decimalValue = 0; uint64_t decimalValue = 0;
uint64_t exponentValue = 0; uint64_t exponentValue = 0;
@ -327,12 +374,13 @@ namespace ElohimScript::Parser {
if (isExponent) { if (isExponent) {
val *= pow(10, exponentValue); val *= pow(10, exponentValue);
} }
return new FloatLiteral(val); return new FloatLiteral(TextSpan(start, _position), val);
} }
return new IntegerLiteral(value); return new IntegerLiteral(TextSpan(start, _position), value);
} }
IntegerLiteral* Lexer::LexHexadecimal() { IntegerLiteral* Lexer::LexHexadecimal() {
auto start = _position;
uint64_t value = 0; uint64_t value = 0;
while (true) { while (true) {
auto v = LexHexadecimalValue(Peek()); auto v = LexHexadecimalValue(Peek());
@ -343,9 +391,10 @@ namespace ElohimScript::Parser {
value <<= 4; value <<= 4;
value += v; value += v;
} }
return new IntegerLiteral(value); return new IntegerLiteral(TextSpan(start, _position), value);
} }
IntegerLiteral* Lexer::LexOctal() { IntegerLiteral* Lexer::LexOctal() {
auto start = _position;
uint64_t value = 0; uint64_t value = 0;
while (true) { while (true) {
auto v = LexOctalValue(Peek()); auto v = LexOctalValue(Peek());
@ -356,9 +405,10 @@ namespace ElohimScript::Parser {
value <<= 3; value <<= 3;
value += v; value += v;
} }
return new IntegerLiteral(value); return new IntegerLiteral(TextSpan(start, _position), value);
} }
IntegerLiteral* Lexer::LexBinary() { IntegerLiteral* Lexer::LexBinary() {
auto start = _position;
uint64_t value = 0; uint64_t value = 0;
while (true) { while (true) {
auto v = LexBinaryValue(Peek()); auto v = LexBinaryValue(Peek());
@ -369,7 +419,7 @@ namespace ElohimScript::Parser {
value <<= 1; value <<= 1;
value += v; value += v;
} }
return new IntegerLiteral(value); return new IntegerLiteral(TextSpan(start, _position), value);
} }
StringLiteral* Lexer::LexString(char8_t opening, bool heredoc) { StringLiteral* Lexer::LexString(char8_t opening, bool heredoc) {
Progress(); Progress();
@ -388,11 +438,13 @@ namespace ElohimScript::Parser {
break; break;
} }
if (current == u8'\0') { if (current == u8'\0') {
// TODO: Log error _diagnostics->LogError(Diagnostics::DiagnosticType::ExpectedEndOfString,
TextSpan(start, start + offset));
break; break;
} }
if (!heredoc && (current == u8'\n' || current == u8'\r')) { if (!heredoc && (current == u8'\n' || current == u8'\r')) {
// TODO: log error _diagnostics->LogError(Diagnostics::DiagnosticType::ExpectedEndOfString,
TextSpan(start, start + offset));
break; break;
} }
offset++; offset++;
@ -401,14 +453,15 @@ namespace ElohimScript::Parser {
if (heredoc) { if (heredoc) {
Progress(2); Progress(2);
} }
return new StringLiteral(std::u8string(_script.substr(start, offset))); return new StringLiteral(TextSpan(start, start + _position), std::u8string(_script.substr(start, offset)));
} }
static uint32_t constexpr Hash(const char8_t* input) { static uint32_t constexpr Hash(const char8_t* input) {
return *input ? static_cast<uint32_t>(*input) + 33 * Hash(input + 1) : 5381; return *input != 0U ? static_cast<uint32_t>(*input) + 33 * Hash(input + 1) : 5381;
}; };
LexToken* Lexer::LexKeywordOrIdentifier() { LexToken* Lexer::LexKeywordOrIdentifier() {
auto start = _position;
auto offset = 0; auto offset = 0;
while (IsAlphaNumericalOrUnderscore(Peek(offset))) { while (IsAlphaNumericalOrUnderscore(Peek(offset))) {
offset++; offset++;
@ -416,137 +469,75 @@ namespace ElohimScript::Parser {
auto str = _script.substr(_position, offset); auto str = _script.substr(_position, offset);
Progress(offset); Progress(offset);
switch (Hash(str.data())) { switch (Hash(str.data())) {
case Hash(u8"and"): case Hash(u8"and"): return new LexTokenImpl<LexTokenKind::AndKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::AndKeyword>(); case Hash(u8"abstract"): return new LexTokenImpl<LexTokenKind::AbstractKeyword>(TextSpan(start, _position));
case Hash(u8"abstract"): case Hash(u8"auto"): return new LexTokenImpl<LexTokenKind::AutoKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::AbstractKeyword>(); case Hash(u8"bool"): return new LexTokenImpl<LexTokenKind::BoolKeyword>(TextSpan(start, _position));
case Hash(u8"auto"): case Hash(u8"break"): return new LexTokenImpl<LexTokenKind::BreakKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::AutoKeyword>(); case Hash(u8"case"): return new LexTokenImpl<LexTokenKind::CaseKeyword>(TextSpan(start, _position));
case Hash(u8"bool"): case Hash(u8"cast"): return new LexTokenImpl<LexTokenKind::CastKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::BoolKeyword>(); case Hash(u8"catch"): return new LexTokenImpl<LexTokenKind::CatchKeyword>(TextSpan(start, _position));
case Hash(u8"break"): case Hash(u8"class"): return new LexTokenImpl<LexTokenKind::ClassKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::BreakKeyword>(); case Hash(u8"const"): return new LexTokenImpl<LexTokenKind::ConstKeyword>(TextSpan(start, _position));
case Hash(u8"case"): case Hash(u8"continue"): return new LexTokenImpl<LexTokenKind::ContinueKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::CaseKeyword>(); case Hash(u8"default"): return new LexTokenImpl<LexTokenKind::DefaultKeyword>(TextSpan(start, _position));
case Hash(u8"cast"): case Hash(u8"do"): return new LexTokenImpl<LexTokenKind::DoKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::CastKeyword>(); case Hash(u8"double"): return new LexTokenImpl<LexTokenKind::DoubleKeyword>(TextSpan(start, _position));
case Hash(u8"catch"): case Hash(u8"else"): return new LexTokenImpl<LexTokenKind::ElseKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::CatchKeyword>(); case Hash(u8"enum"): return new LexTokenImpl<LexTokenKind::EnumKeyword>(TextSpan(start, _position));
case Hash(u8"class"): case Hash(u8"explicit"): return new LexTokenImpl<LexTokenKind::ExplicitKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::ClassKeyword>(); case Hash(u8"external"): return new LexTokenImpl<LexTokenKind::ExternalKeyword>(TextSpan(start, _position));
case Hash(u8"const"): case Hash(u8"false"): return new LexTokenImpl<LexTokenKind::FalseKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::ConstKeyword>(); case Hash(u8"final"): return new LexTokenImpl<LexTokenKind::FinalKeyword>(TextSpan(start, _position));
case Hash(u8"continue"): case Hash(u8"float"): return new LexTokenImpl<LexTokenKind::FloatKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::ContinueKeyword>(); case Hash(u8"for"): return new LexTokenImpl<LexTokenKind::ForKeyword>(TextSpan(start, _position));
case Hash(u8"default"): case Hash(u8"from"): return new LexTokenImpl<LexTokenKind::FromKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::DefaultKeyword>(); case Hash(u8"funcdef"): return new LexTokenImpl<LexTokenKind::FuncdefKeyword>(TextSpan(start, _position));
case Hash(u8"do"): case Hash(u8"function"): return new LexTokenImpl<LexTokenKind::FunctionKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::DoKeyword>(); case Hash(u8"get"): return new LexTokenImpl<LexTokenKind::GetKeyword>(TextSpan(start, _position));
case Hash(u8"double"): case Hash(u8"if"): return new LexTokenImpl<LexTokenKind::IfKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::DoubleKeyword>(); case Hash(u8"import"): return new LexTokenImpl<LexTokenKind::ImportKeyword>(TextSpan(start, _position));
case Hash(u8"else"): case Hash(u8"in"): return new LexTokenImpl<LexTokenKind::InKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::ElseKeyword>(); case Hash(u8"inout"): return new LexTokenImpl<LexTokenKind::InoutKeyword>(TextSpan(start, _position));
case Hash(u8"enum"): case Hash(u8"int"): return new LexTokenImpl<LexTokenKind::IntKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::EnumKeyword>();
case Hash(u8"explicit"):
return new LexTokenImpl<LexTokenKind::ExplicitKeyword>();
case Hash(u8"external"):
return new LexTokenImpl<LexTokenKind::ExternalKeyword>();
case Hash(u8"false"):
return new LexTokenImpl<LexTokenKind::FalseKeyword>();
case Hash(u8"final"):
return new LexTokenImpl<LexTokenKind::FinalKeyword>();
case Hash(u8"float"):
return new LexTokenImpl<LexTokenKind::FloatKeyword>();
case Hash(u8"for"):
return new LexTokenImpl<LexTokenKind::ForKeyword>();
case Hash(u8"from"):
return new LexTokenImpl<LexTokenKind::FromKeyword>();
case Hash(u8"funcdef"):
return new LexTokenImpl<LexTokenKind::FuncdefKeyword>();
case Hash(u8"function"):
return new LexTokenImpl<LexTokenKind::FunctionKeyword>();
case Hash(u8"get"):
return new LexTokenImpl<LexTokenKind::GetKeyword>();
case Hash(u8"if"):
return new LexTokenImpl<LexTokenKind::IfKeyword>();
case Hash(u8"import"):
return new LexTokenImpl<LexTokenKind::ImportKeyword>();
case Hash(u8"in"):
return new LexTokenImpl<LexTokenKind::InKeyword>();
case Hash(u8"inout"):
return new LexTokenImpl<LexTokenKind::InoutKeyword>();
case Hash(u8"int"):
return new LexTokenImpl<LexTokenKind::IntKeyword>();
case Hash(u8"interface"): case Hash(u8"interface"):
return new LexTokenImpl<LexTokenKind::InterfaceKeyword>(); return new LexTokenImpl<LexTokenKind::InterfaceKeyword>(TextSpan(start, _position));
case Hash(u8"int8"): case Hash(u8"int8"): return new LexTokenImpl<LexTokenKind::Int8Keyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::Int8Keyword>(); case Hash(u8"int16"): return new LexTokenImpl<LexTokenKind::Int16Keyword>(TextSpan(start, _position));
case Hash(u8"int16"): case Hash(u8"int32"): return new LexTokenImpl<LexTokenKind::Int32Keyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::Int16Keyword>(); case Hash(u8"int64"): return new LexTokenImpl<LexTokenKind::Int64Keyword>(TextSpan(start, _position));
case Hash(u8"int32"): case Hash(u8"is"): return new LexTokenImpl<LexTokenKind::IsKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::Int32Keyword>(); case Hash(u8"mixin"): return new LexTokenImpl<LexTokenKind::MixinKeyword>(TextSpan(start, _position));
case Hash(u8"int64"):
return new LexTokenImpl<LexTokenKind::Int64Keyword>();
case Hash(u8"is"):
return new LexTokenImpl<LexTokenKind::IsKeyword>();
case Hash(u8"mixin"):
return new LexTokenImpl<LexTokenKind::MixinKeyword>();
case Hash(u8"namespace"): case Hash(u8"namespace"):
return new LexTokenImpl<LexTokenKind::NamespaceKeyword>(); return new LexTokenImpl<LexTokenKind::NamespaceKeyword>(TextSpan(start, _position));
case Hash(u8"not"): case Hash(u8"not"): return new LexTokenImpl<LexTokenKind::NotKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::NotKeyword>(); case Hash(u8"null"): return new LexTokenImpl<LexTokenKind::NullKeyword>(TextSpan(start, _position));
case Hash(u8"null"): case Hash(u8"or"): return new LexTokenImpl<LexTokenKind::OrKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::NullKeyword>(); case Hash(u8"out"): return new LexTokenImpl<LexTokenKind::OutKeyword>(TextSpan(start, _position));
case Hash(u8"or"): case Hash(u8"override"): return new LexTokenImpl<LexTokenKind::OverrideKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::OrKeyword>(); case Hash(u8"private"): return new LexTokenImpl<LexTokenKind::PrivateKeyword>(TextSpan(start, _position));
case Hash(u8"out"): case Hash(u8"property"): return new LexTokenImpl<LexTokenKind::PropertyKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::OutKeyword>();
case Hash(u8"override"):
return new LexTokenImpl<LexTokenKind::OverrideKeyword>();
case Hash(u8"private"):
return new LexTokenImpl<LexTokenKind::PrivateKeyword>();
case Hash(u8"property"):
return new LexTokenImpl<LexTokenKind::PropertyKeyword>();
case Hash(u8"protected"): case Hash(u8"protected"):
return new LexTokenImpl<LexTokenKind::ProtectedKeyword>(); return new LexTokenImpl<LexTokenKind::ProtectedKeyword>(TextSpan(start, _position));
case Hash(u8"return"): case Hash(u8"return"): return new LexTokenImpl<LexTokenKind::ReturnKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::ReturnKeyword>(); case Hash(u8"set"): return new LexTokenImpl<LexTokenKind::SetKeyword>(TextSpan(start, _position));
case Hash(u8"set"): case Hash(u8"shared"): return new LexTokenImpl<LexTokenKind::SharedKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::SetKeyword>(); case Hash(u8"super"): return new LexTokenImpl<LexTokenKind::SuperKeyword>(TextSpan(start, _position));
case Hash(u8"shared"): case Hash(u8"switch"): return new LexTokenImpl<LexTokenKind::SwitchKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::SharedKeyword>(); case Hash(u8"this"): return new LexTokenImpl<LexTokenKind::ThisKeyword>(TextSpan(start, _position));
case Hash(u8"super"): case Hash(u8"true"): return new LexTokenImpl<LexTokenKind::TrueKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::SuperKeyword>(); case Hash(u8"try"): return new LexTokenImpl<LexTokenKind::TryKeyword>(TextSpan(start, _position));
case Hash(u8"switch"): case Hash(u8"typedef"): return new LexTokenImpl<LexTokenKind::TypedefKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::SwitchKeyword>(); case Hash(u8"uint"): return new LexTokenImpl<LexTokenKind::UintKeyword>(TextSpan(start, _position));
case Hash(u8"this"): case Hash(u8"uint8"): return new LexTokenImpl<LexTokenKind::Uint8Keyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::ThisKeyword>(); case Hash(u8"uint16"): return new LexTokenImpl<LexTokenKind::Uint16Keyword>(TextSpan(start, _position));
case Hash(u8"true"): case Hash(u8"uint32"): return new LexTokenImpl<LexTokenKind::Uint32Keyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::TrueKeyword>(); case Hash(u8"uint64"): return new LexTokenImpl<LexTokenKind::Uint64Keyword>(TextSpan(start, _position));
case Hash(u8"try"): case Hash(u8"void"): return new LexTokenImpl<LexTokenKind::VoidKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::TryKeyword>(); case Hash(u8"while"): return new LexTokenImpl<LexTokenKind::WhileKeyword>(TextSpan(start, _position));
case Hash(u8"typedef"): case Hash(u8"xor"): return new LexTokenImpl<LexTokenKind::XorKeyword>(TextSpan(start, _position));
return new LexTokenImpl<LexTokenKind::TypedefKeyword>();
case Hash(u8"uint"):
return new LexTokenImpl<LexTokenKind::UintKeyword>();
case Hash(u8"uint8"):
return new LexTokenImpl<LexTokenKind::Uint8Keyword>();
case Hash(u8"uint16"):
return new LexTokenImpl<LexTokenKind::Uint16Keyword>();
case Hash(u8"uint32"):
return new LexTokenImpl<LexTokenKind::Uint32Keyword>();
case Hash(u8"uint64"):
return new LexTokenImpl<LexTokenKind::Uint64Keyword>();
case Hash(u8"void"):
return new LexTokenImpl<LexTokenKind::VoidKeyword>();
case Hash(u8"while"):
return new LexTokenImpl<LexTokenKind::WhileKeyword>();
case Hash(u8"xor"):
return new LexTokenImpl<LexTokenKind::XorKeyword>();
default: default: return new IdentifierToken(TextSpan(start, _position), std::u8string(str));
return new IdentifierToken(std::u8string(str));
} }
} }
bool Lexer::IsAlphaNumericalOrUnderscore(char8_t c) { bool Lexer::IsAlphaNumericalOrUnderscore(char8_t c) {

View File

@ -2,19 +2,22 @@
#define ELOHIMSCRIPT_LEXER_HPP #define ELOHIMSCRIPT_LEXER_HPP
#include <string_view> #include <string_view>
#include "../../Diagnostics/Diagnostics.hpp"
#include "LexToken.hpp" #include "LexToken.hpp"
namespace ElohimScript::Parser { namespace ElohimScript::Parser {
class Lexer { class Lexer {
public: public:
Lexer(const char* script) : _script(reinterpret_cast<const char8_t*>(script)) {} Lexer(const char* script, Diagnostics::Diagnostics* diag)
Lexer(const char8_t* script) : _script(script) {} : Lexer(reinterpret_cast<const char8_t*>(script), diag) {}
Lexer(std::u8string_view script) : _script(script) {} Lexer(const char8_t* script, Diagnostics::Diagnostics* diag) : _script(script), _diagnostics(diag) {}
Lexer(std::u8string_view script, Diagnostics::Diagnostics* diag) : _script(script), _diagnostics(diag) {}
const LexToken* Lex(); const LexToken* Lex();
private: private:
std::u8string_view _script; std::u8string_view _script;
size_t _position = -1; size_t _position = -1;
Diagnostics::Diagnostics* _diagnostics;
inline char8_t Consume() { inline char8_t Consume() {
if (++_position >= _script.size()) { if (++_position >= _script.size()) {
@ -23,9 +26,7 @@ namespace ElohimScript::Parser {
return _script[_position]; return _script[_position];
} }
inline void Progress(size_t steps = 1){ inline void Progress(size_t steps = 1) { _position += steps; }
_position += steps;
}
inline char8_t Peek(size_t offset = 1) { inline char8_t Peek(size_t offset = 1) {
auto pos = _position + offset; auto pos = _position + offset;

19
src/Parser/TextSpan.hpp Normal file
View File

@ -0,0 +1,19 @@
#ifndef ELOHIMSCRIPT_TEXTSPAN_HPP
#define ELOHIMSCRIPT_TEXTSPAN_HPP
#include <cstddef>
namespace ElohimScript {
class TextSpan {
size_t _start;
size_t _end;
public:
inline TextSpan(size_t start, size_t end) : _start(start), _end(end) {}
[[nodiscard]] inline size_t GetStart() const noexcept { return _start; }
[[nodiscard]] inline size_t GetEnd() const noexcept { return _end; }
inline bool operator==(const TextSpan& rhs) const { return _start == rhs._start && _end == rhs._end; }
inline bool operator!=(const TextSpan& rhs) const { return !(rhs == *this); }
};
}
#endif // ELOHIMSCRIPT_TEXTSPAN_HPP

View File

@ -5,8 +5,10 @@ using namespace ElohimScript::Parser;
#define KEYWORD_TEST(script, symbol) \ #define KEYWORD_TEST(script, symbol) \
TEST_CASE("Lex " script) { \ TEST_CASE("Lex " script) { \
auto lexer = Lexer(script); \ ElohimScript::Diagnostics::Diagnostics diag; \
auto lexer = Lexer(script, &diag); \
const auto* token = lexer.Lex(); \ const auto* token = lexer.Lex(); \
CHECK(diag.GetMessages().empty()); \
CHECK(token->GetKind() == LexTokenKind::symbol); \ CHECK(token->GetKind() == LexTokenKind::symbol); \
CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile); \ CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile); \
delete token; \ delete token; \
@ -91,8 +93,10 @@ namespace doctest {
#define IDENTIFIER_TEST(identifier) \ #define IDENTIFIER_TEST(identifier) \
TEST_CASE("Lex identifier " identifier) { \ TEST_CASE("Lex identifier " identifier) { \
auto lexer = Lexer(identifier); \ ElohimScript::Diagnostics::Diagnostics diag; \
auto lexer = Lexer(identifier, &diag); \
const auto* token = lexer.Lex(); \ const auto* token = lexer.Lex(); \
CHECK(diag.GetMessages().empty()); \
REQUIRE(token->GetKind() == LexTokenKind::Identifier); \ REQUIRE(token->GetKind() == LexTokenKind::Identifier); \
auto value = ((IdentifierToken*)token)->GetValue(); \ auto value = ((IdentifierToken*)token)->GetValue(); \
CHECK(value == std::u8string(reinterpret_cast<const char8_t*>(identifier))); \ CHECK(value == std::u8string(reinterpret_cast<const char8_t*>(identifier))); \

View File

@ -5,8 +5,10 @@ using namespace ElohimScript::Parser;
#define INTEGER_TEST(script, expected) \ #define INTEGER_TEST(script, expected) \
TEST_CASE("Lex " script) { \ TEST_CASE("Lex " script) { \
auto lexer = Lexer(script); \ ElohimScript::Diagnostics::Diagnostics diag; \
auto lexer = Lexer(script, &diag); \
const auto* token = lexer.Lex(); \ const auto* token = lexer.Lex(); \
CHECK(diag.GetMessages().empty()); \
REQUIRE(token->GetKind() == LexTokenKind::IntegerLiteral); \ REQUIRE(token->GetKind() == LexTokenKind::IntegerLiteral); \
auto value = ((const IntegerLiteral*)token)->GetValue(); \ auto value = ((const IntegerLiteral*)token)->GetValue(); \
CHECK(value == (expected)); \ CHECK(value == (expected)); \
@ -16,8 +18,10 @@ using namespace ElohimScript::Parser;
#define FLOAT_TEST(script, expected) \ #define FLOAT_TEST(script, expected) \
TEST_CASE("Lex " script) { \ TEST_CASE("Lex " script) { \
auto lexer = Lexer(script); \ ElohimScript::Diagnostics::Diagnostics diag; \
auto lexer = Lexer(script, &diag); \
const auto* token = lexer.Lex(); \ const auto* token = lexer.Lex(); \
CHECK(diag.GetMessages().empty()); \
REQUIRE(token->GetKind() == LexTokenKind::FloatLiteral); \ REQUIRE(token->GetKind() == LexTokenKind::FloatLiteral); \
auto value = ((const FloatLiteral*)token)->GetValue(); \ auto value = ((const FloatLiteral*)token)->GetValue(); \
CHECK(value == (expected)); \ CHECK(value == (expected)); \
@ -64,3 +68,15 @@ INTEGER_TEST("0b110011", 51);
#undef INTEGER_TEST #undef INTEGER_TEST
#undef FLOAT_TEST #undef FLOAT_TEST
TEST_CASE("Lex invalid numerical base") {
ElohimScript::Diagnostics::Diagnostics diag;
auto lexer = Lexer("0f553", &diag);
const auto* token = lexer.Lex();
const auto& messages = diag.GetMessages();
REQUIRE(messages.size() == 1);
CHECK(messages[0].GetType() == ElohimScript::Diagnostics::DiagnosticType::InvalidNumericalBase);
CHECK(messages[0].GetLevel() == ElohimScript::Diagnostics::DiagnosticLevel::Error);
CHECK(messages[0].GetSpan() == ElohimScript::TextSpan(0, 2));
delete token;
}

View File

@ -5,8 +5,10 @@ using namespace ElohimScript::Parser;
#define STRING_TEST(str, constraint) \ #define STRING_TEST(str, constraint) \
TEST_CASE("Lex string " constraint str constraint) { \ TEST_CASE("Lex string " constraint str constraint) { \
auto lexer = Lexer(constraint str constraint); \ ElohimScript::Diagnostics::Diagnostics diag; \
auto lexer = Lexer(constraint str constraint, &diag); \
const auto* token = lexer.Lex(); \ const auto* token = lexer.Lex(); \
CHECK(diag.GetMessages().empty()); \
REQUIRE(token->GetKind() == LexTokenKind::StringLiteral); \ REQUIRE(token->GetKind() == LexTokenKind::StringLiteral); \
auto value = ((const StringLiteral*)token)->GetValue(); \ auto value = ((const StringLiteral*)token)->GetValue(); \
CHECK(value == std::u8string(reinterpret_cast<const char8_t*>(str))); \ CHECK(value == std::u8string(reinterpret_cast<const char8_t*>(str))); \
@ -21,9 +23,12 @@ STRING_TEST("\"foo bar\"", "\"\"\"");
STRING_TEST("\"\"foo bar\"\"", "\"\"\""); STRING_TEST("\"\"foo bar\"\"", "\"\"\"");
TEST_CASE("Lex multiline string") { TEST_CASE("Lex multiline string") {
ElohimScript::Diagnostics::Diagnostics diag;
auto lexer = Lexer(R"("""foo auto lexer = Lexer(R"("""foo
bar""")"); bar""")",
&diag);
const auto* token = lexer.Lex(); const auto* token = lexer.Lex();
CHECK(diag.GetMessages().empty());
REQUIRE(token->GetKind() == LexTokenKind::StringLiteral); REQUIRE(token->GetKind() == LexTokenKind::StringLiteral);
auto value = (dynamic_cast<const StringLiteral*>(token))->GetValue(); auto value = (dynamic_cast<const StringLiteral*>(token))->GetValue();
CHECK(value == std::u8string(reinterpret_cast<const char8_t*>(R"(foo CHECK(value == std::u8string(reinterpret_cast<const char8_t*>(R"(foo

View File

@ -6,8 +6,10 @@ using namespace ElohimScript::Parser;
#define SYMBOL_TEST(script, symbol) \ #define SYMBOL_TEST(script, symbol) \
TEST_CASE("Lex " script) { \ TEST_CASE("Lex " script) { \
auto lexer = Lexer(script); \ ElohimScript::Diagnostics::Diagnostics diag; \
auto lexer = Lexer(script, &diag); \
const auto* token = lexer.Lex(); \ const auto* token = lexer.Lex(); \
CHECK(diag.GetMessages().empty()); \
CHECK(token->GetKind() == LexTokenKind::symbol); \ CHECK(token->GetKind() == LexTokenKind::symbol); \
CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile); \ CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile); \
delete token; \ delete token; \
@ -71,9 +73,11 @@ SYMBOL_TEST(" ", Whitespace)
TEST_CASE("Lex whitespace") { TEST_CASE("Lex whitespace") {
auto whitespace = {" ", "\t", "\n", "\r", "\xef\xbb\xbf"}; auto whitespace = {" ", "\t", "\n", "\r", "\xef\xbb\xbf"};
for (auto v : whitespace) { for (const auto *v : whitespace) {
auto lexer = Lexer(v); ElohimScript::Diagnostics::Diagnostics diag;
auto lexer = Lexer(v, &diag);
const auto* token = lexer.Lex(); const auto* token = lexer.Lex();
CHECK(diag.GetMessages().empty());
CHECK(token->GetKind() == LexTokenKind::Whitespace); CHECK(token->GetKind() == LexTokenKind::Whitespace);
CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile); CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile);
delete token; delete token;