From b6a5e047c231617d48a8c965b3dd8498d2f49cda Mon Sep 17 00:00:00 2001 From: Deukhoofd Date: Sun, 4 Oct 2020 19:38:13 +0200 Subject: [PATCH] Support for diagnostics system. --- src/Diagnostics/Diagnostic.hpp | 23 ++ src/Diagnostics/DiagnosticLevel.hpp | 15 + src/Diagnostics/DiagnosticType.hpp | 20 ++ src/Diagnostics/Diagnostics.cpp | 1 + src/Diagnostics/Diagnostics.hpp | 25 ++ src/Parser/Lexer/LexToken.hpp | 20 +- src/Parser/Lexer/Lexer.cpp | 379 ++++++++++++------------ src/Parser/Lexer/Lexer.hpp | 13 +- src/Parser/TextSpan.hpp | 19 ++ tests/LexerTests/IdentifierLexTests.cpp | 8 +- tests/LexerTests/NumericalLexTests.cpp | 22 +- tests/LexerTests/StringLexTests.cpp | 9 +- tests/LexerTests/SymbolLexTests.cpp | 10 +- 13 files changed, 347 insertions(+), 217 deletions(-) create mode 100644 src/Diagnostics/Diagnostic.hpp create mode 100644 src/Diagnostics/DiagnosticLevel.hpp create mode 100644 src/Diagnostics/DiagnosticType.hpp create mode 100644 src/Diagnostics/Diagnostics.cpp create mode 100644 src/Diagnostics/Diagnostics.hpp create mode 100644 src/Parser/TextSpan.hpp diff --git a/src/Diagnostics/Diagnostic.hpp b/src/Diagnostics/Diagnostic.hpp new file mode 100644 index 0000000..81600bb --- /dev/null +++ b/src/Diagnostics/Diagnostic.hpp @@ -0,0 +1,23 @@ +#ifndef ELOHIMSCRIPT_DIAGNOSTIC_HPP +#define ELOHIMSCRIPT_DIAGNOSTIC_HPP + +#include "../Parser/TextSpan.hpp" +#include "DiagnosticLevel.hpp" +#include "DiagnosticType.hpp" +namespace ElohimScript::Diagnostics { + class Diagnostic { + DiagnosticLevel _level; + DiagnosticType _type; + TextSpan _span; + + public: + inline Diagnostic(DiagnosticLevel level, DiagnosticType type, TextSpan span) + : _level(level), _type(type), _span(span) {} + + [[nodiscard]] inline DiagnosticLevel GetLevel() const noexcept { return _level; } + [[nodiscard]] inline DiagnosticType GetType() const noexcept { return _type; } + [[nodiscard]] inline const TextSpan& GetSpan() const noexcept { return _span; } + }; +} + +#endif // ELOHIMSCRIPT_DIAGNOSTIC_HPP diff --git a/src/Diagnostics/DiagnosticLevel.hpp b/src/Diagnostics/DiagnosticLevel.hpp new file mode 100644 index 0000000..382ce9c --- /dev/null +++ b/src/Diagnostics/DiagnosticLevel.hpp @@ -0,0 +1,15 @@ +#ifndef ELOHIMSCRIPT_DIAGNOSTICLEVEL_HPP +#define ELOHIMSCRIPT_DIAGNOSTICLEVEL_HPP + +#include +namespace ElohimScript::Diagnostics { + enum class DiagnosticLevel : uint8_t { + Trace, + Information, + Warning, + Error, + Critical, + }; +} + +#endif // ELOHIMSCRIPT_DIAGNOSTICLEVEL_HPP diff --git a/src/Diagnostics/DiagnosticType.hpp b/src/Diagnostics/DiagnosticType.hpp new file mode 100644 index 0000000..f619ce1 --- /dev/null +++ b/src/Diagnostics/DiagnosticType.hpp @@ -0,0 +1,20 @@ +#ifndef ELOHIMSCRIPT_DIAGNOSTICTYPE_HPP +#define ELOHIMSCRIPT_DIAGNOSTICTYPE_HPP +#include + +namespace ElohimScript::Diagnostics { + enum class DiagnosticType : uint8_t { UnknownToken, InvalidNumericalBase, ExpectedEndOfString }; + + class DiagnosticTypeHelper { + static std::string ToEnglishString(DiagnosticType type) { + switch (type) { + case DiagnosticType::UnknownToken: return "Unknown token"; + case DiagnosticType::InvalidNumericalBase: return "Invalid numerical base"; + case DiagnosticType::ExpectedEndOfString: return "Expected end of string"; + } + return std::to_string((uint8_t)type); + } + }; +} + +#endif // ELOHIMSCRIPT_DIAGNOSTICTYPE_HPP diff --git a/src/Diagnostics/Diagnostics.cpp b/src/Diagnostics/Diagnostics.cpp new file mode 100644 index 0000000..f7c030d --- /dev/null +++ b/src/Diagnostics/Diagnostics.cpp @@ -0,0 +1 @@ +#include "Diagnostics.hpp" diff --git a/src/Diagnostics/Diagnostics.hpp b/src/Diagnostics/Diagnostics.hpp new file mode 100644 index 0000000..013716b --- /dev/null +++ b/src/Diagnostics/Diagnostics.hpp @@ -0,0 +1,25 @@ +#ifndef ELOHIMSCRIPT_DIAGNOSTICS_HPP +#define ELOHIMSCRIPT_DIAGNOSTICS_HPP + +#include +#include "Diagnostic.hpp" + +namespace ElohimScript::Diagnostics { + class Diagnostics { + std::vector _messages; + + public: + inline void Log(DiagnosticLevel level, DiagnosticType type, TextSpan span) { + _messages.emplace_back(level, type, span); + } + inline void LogTrace(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Trace, type, span); } + inline void LogInfo(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Information, type, span); } + inline void LogWarning(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Warning, type, span); } + inline void LogError(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Error, type, span); } + inline void LogCritical(DiagnosticType type, TextSpan span) { Log(DiagnosticLevel::Critical, type, span); } + + [[nodiscard]] const std::vector& GetMessages() const noexcept { return _messages; } + }; +} + +#endif // ELOHIMSCRIPT_DIAGNOSTICS_HPP diff --git a/src/Parser/Lexer/LexToken.hpp b/src/Parser/Lexer/LexToken.hpp index d98c18d..566ae80 100644 --- a/src/Parser/Lexer/LexToken.hpp +++ b/src/Parser/Lexer/LexToken.hpp @@ -10,16 +10,19 @@ namespace ElohimScript::Parser { friend class Lexer; std::unique_ptr _next; + TextSpan _span; public: + LexToken(TextSpan span) : _span(span) {} virtual ~LexToken() = default; [[nodiscard]] virtual LexTokenKind GetKind() const noexcept = 0; [[nodiscard]] const std::unique_ptr& GetNext() const noexcept { return _next; } + [[nodiscard]] const TextSpan& GetSpan() const noexcept { return _span; } }; template class LexTokenImpl : public LexToken { public: - LexTokenImpl() = default; + LexTokenImpl(TextSpan span) : LexToken(span){}; [[nodiscard]] LexTokenKind GetKind() const noexcept override { return kind; } }; @@ -27,7 +30,8 @@ namespace ElohimScript::Parser { uint64_t _value; public: - IntegerLiteral(uint64_t value) : _value(value) {} + IntegerLiteral(TextSpan span, uint64_t value) + : LexTokenImpl(span), _value(value) {} [[nodiscard]] uint64_t GetValue() const noexcept { return _value; } }; @@ -35,7 +39,7 @@ namespace ElohimScript::Parser { double _value; public: - FloatLiteral(double value) : _value(value) {} + FloatLiteral(TextSpan span, double value) : LexTokenImpl(span), _value(value) {} [[nodiscard]] double GetValue() const noexcept { return _value; } }; @@ -43,15 +47,17 @@ namespace ElohimScript::Parser { std::u8string _value; public: - StringLiteral(std::u8string value) : _value(std::move(value)) {} + StringLiteral(TextSpan span, std::u8string value) + : LexTokenImpl(span), _value(std::move(value)) {} [[nodiscard]] const std::u8string& GetValue() const noexcept { return _value; } }; - class IdentifierToken : public LexTokenImpl{ - std::u8string _value; + class IdentifierToken : public LexTokenImpl { + std::u8string _value; public: - IdentifierToken(std::u8string value) : _value(std::move(value)) {} + IdentifierToken(TextSpan span, std::u8string value) + : LexTokenImpl(span), _value(std::move(value)) {} [[nodiscard]] const std::u8string& GetValue() const noexcept { return _value; } }; } diff --git a/src/Parser/Lexer/Lexer.cpp b/src/Parser/Lexer/Lexer.cpp index cf9d891..29b890d 100644 --- a/src/Parser/Lexer/Lexer.cpp +++ b/src/Parser/Lexer/Lexer.cpp @@ -22,9 +22,10 @@ namespace ElohimScript::Parser { } LexToken* Lexer::LexNext() { + auto start = _position; auto c = Consume(); switch (c) { - case u8'\0': return new LexTokenImpl(); + case u8'\0': return new LexTokenImpl(TextSpan(start, 1)); case u8'*': { auto n = Peek(); if (n == u8'*') { @@ -32,177 +33,219 @@ namespace ElohimScript::Parser { n = Peek(); if (n == u8'=') { Progress(); - return new LexTokenImpl(); + // **= + return new LexTokenImpl(TextSpan(start, 3)); } - return new LexTokenImpl(); + // ** + return new LexTokenImpl(TextSpan(start, 2)); } if (n == u8'=') { Progress(); - return new LexTokenImpl(); + // *= + return new LexTokenImpl(TextSpan(start, 2)); } - return new LexTokenImpl(); + // * + return new LexTokenImpl(TextSpan(start, 1)); } case u8'/': if (Peek() == u8'=') { Progress(); - return new LexTokenImpl(); + // /= + return new LexTokenImpl(TextSpan(start, 2)); } - return new LexTokenImpl(); + // / + return new LexTokenImpl(TextSpan(start, 1)); case u8'%': if (Peek() == u8'=') { Progress(); - return new LexTokenImpl(); + // %= + return new LexTokenImpl(TextSpan(start, 2)); } - return new LexTokenImpl(); + // % + return new LexTokenImpl(TextSpan(start, 1)); case u8'+': { auto n = Peek(); if (n == u8'=') { Progress(); - return new LexTokenImpl(); + // += + return new LexTokenImpl(TextSpan(start, 2)); } if (n == u8'+') { Progress(); - return new LexTokenImpl(); + // ++ + return new LexTokenImpl(TextSpan(start, 2)); } - return new LexTokenImpl(); + // + + return new LexTokenImpl(TextSpan(start, 1)); } case u8'-': { auto n = Peek(); if (n == u8'=') { Progress(); - return new LexTokenImpl(); + // -= + return new LexTokenImpl(TextSpan(start, 2)); } if (n == u8'-') { Progress(); - return new LexTokenImpl(); + // -- + return new LexTokenImpl(TextSpan(start, 2)); } - return new LexTokenImpl(); + // - + return new LexTokenImpl(TextSpan(start, 1)); } case u8'<': { auto n = Peek(); if (n == u8'=') { Progress(); - return new LexTokenImpl(); + // <= + return new LexTokenImpl(TextSpan(start, 2)); } if (n == u8'<') { Progress(); if (Peek() == u8'=') { Progress(); - return new LexTokenImpl(); + // <<= + return new LexTokenImpl(TextSpan(start, 3)); } - return new LexTokenImpl(); + // << + return new LexTokenImpl(TextSpan(start, 2)); } - return new LexTokenImpl(); + // < + return new LexTokenImpl(TextSpan(start, 1)); } case u8'>': { auto n = Peek(); if (n == u8'=') { Progress(); - return new LexTokenImpl(); + // >= + return new LexTokenImpl(TextSpan(start, 2)); } if (n == u8'>') { Progress(); n = Peek(); if (n == u8'=') { Progress(); - return new LexTokenImpl(); + // >>= + return new LexTokenImpl(TextSpan(start, 3)); } if (n == u8'>') { Progress(); if (Peek() == u8'=') { Progress(); - return new LexTokenImpl(); + // >>>= + return new LexTokenImpl( + TextSpan(start, 4)); } - return new LexTokenImpl(); + // >>> + return new LexTokenImpl( + TextSpan(start, 3)); } - return new LexTokenImpl(); + // >> + return new LexTokenImpl(TextSpan(start, 2)); } - return new LexTokenImpl(); + // > + return new LexTokenImpl(TextSpan(start, 1)); } - case u8'(': return new LexTokenImpl(); - case u8')': return new LexTokenImpl(); + case u8'(': return new LexTokenImpl(TextSpan(start, 1)); + case u8')': return new LexTokenImpl(TextSpan(start, 1)); case u8'=': { if (Peek() == u8'=') { Progress(); - return new LexTokenImpl(); + // == + return new LexTokenImpl(TextSpan(start, 2)); } - return new LexTokenImpl(); + // = + return new LexTokenImpl(TextSpan(start, 1)); } case u8'!': { auto n = Peek(); if (n == u8'=') { Progress(); - return new LexTokenImpl(); + // != + return new LexTokenImpl(TextSpan(start, 2)); } if (n == u8'i' && Peek(2) == u8's') { Progress(2); - return new LexTokenImpl(); + // !is + return new LexTokenImpl(TextSpan(start, 3)); } - return new LexTokenImpl(); + // ! + return new LexTokenImpl(TextSpan(start, 1)); } - case u8'?': return new LexTokenImpl(); + case u8'?': return new LexTokenImpl(TextSpan(start, 1)); case u8':': { if (Peek() == u8':') { Progress(); - return new LexTokenImpl(); + // :: + return new LexTokenImpl(TextSpan(start, 2)); } - return new LexTokenImpl(); + // : + return new LexTokenImpl(TextSpan(start, 1)); } case u8'&': { auto n = Peek(); if (n == u8'=') { Progress(); - return new LexTokenImpl(); + // &= + return new LexTokenImpl(TextSpan(start, 2)); } if (n == u8'&') { Progress(); - return new LexTokenImpl(); + // && + return new LexTokenImpl(TextSpan(start, 2)); } - return new LexTokenImpl(); + // & + return new LexTokenImpl(TextSpan(start, 1)); } - case u8',': return new LexTokenImpl(); - case u8'{': return new LexTokenImpl(); - case u8'}': return new LexTokenImpl(); - case u8';': return new LexTokenImpl(); + case u8',': return new LexTokenImpl(TextSpan(start, 1)); + case u8'{': return new LexTokenImpl(TextSpan(start, 1)); + case u8'}': return new LexTokenImpl(TextSpan(start, 1)); + case u8';': return new LexTokenImpl(TextSpan(start, 1)); case u8'|': { auto n = Peek(); if (n == u8'=') { Progress(); - return new LexTokenImpl(); + // |= + return new LexTokenImpl(TextSpan(start, 2)); } if (n == u8'|') { Progress(); - return new LexTokenImpl(); + // || + return new LexTokenImpl(TextSpan(start, 2)); } - return new LexTokenImpl(); + // | + return new LexTokenImpl(TextSpan(start, 1)); } case u8'^': { auto n = Peek(); if (n == u8'=') { Progress(); - return new LexTokenImpl(); + // ^= + return new LexTokenImpl(TextSpan(start, start + 2)); } if (n == u8'^') { Progress(); - return new LexTokenImpl(); + // ^^ + return new LexTokenImpl(TextSpan(start, start + 2)); } - return new LexTokenImpl(); + // ^ + return new LexTokenImpl(TextSpan(start, start + 1)); } - case u8'~': return new LexTokenImpl(); - case u8'.': return new LexTokenImpl(); - case u8'[': return new LexTokenImpl(); - case u8']': return new LexTokenImpl(); - case u8'@': return new LexTokenImpl(); + case u8'~': return new LexTokenImpl(TextSpan(start, start + 1)); + case u8'.': return new LexTokenImpl(TextSpan(start, start + 1)); + case u8'[': return new LexTokenImpl(TextSpan(start, start + 1)); + case u8']': return new LexTokenImpl(TextSpan(start, start + 1)); + case u8'@': return new LexTokenImpl(TextSpan(start, start + 1)); case u8' ': case u8'\r': case u8'\n': - case u8'\t': return new LexTokenImpl(); + case u8'\t': return new LexTokenImpl(TextSpan(start, start + 1)); // Byte order mark case u8'\xEF': { if (Peek() == u8'\xBB' && Peek(2) == u8'\xBF') { Progress(2); - return new LexTokenImpl(); + return new LexTokenImpl(TextSpan(start, start + 3)); } } case u8'0': @@ -226,8 +269,8 @@ namespace ElohimScript::Parser { default: if (IsAlphaNumericalOrUnderscore(c)) return LexKeywordOrIdentifier(); - // TODO: Log error - return new LexTokenImpl(); + _diagnostics->LogError(Diagnostics::DiagnosticType::UnknownToken, TextSpan(start, start + 1)); + return new LexTokenImpl(TextSpan(start, start + 1)); } } @@ -248,7 +291,10 @@ namespace ElohimScript::Parser { ; case 'b': numericalSystem = 2; break; default: - // TODO: Log Invalid numerical system + _diagnostics->LogError(Diagnostics::DiagnosticType::InvalidNumericalBase, + TextSpan(_position - 1, _position + 1)); + // Set to the largest numerical system, so we can prevent errors down the line. + numericalSystem = 16; break; } } @@ -287,6 +333,7 @@ namespace ElohimScript::Parser { } LexToken* Lexer::LexDecimal(uint64_t initial) { + auto start = _position; uint64_t value = initial; uint64_t decimalValue = 0; uint64_t exponentValue = 0; @@ -327,12 +374,13 @@ namespace ElohimScript::Parser { if (isExponent) { val *= pow(10, exponentValue); } - return new FloatLiteral(val); + return new FloatLiteral(TextSpan(start, _position), val); } - return new IntegerLiteral(value); + return new IntegerLiteral(TextSpan(start, _position), value); } IntegerLiteral* Lexer::LexHexadecimal() { + auto start = _position; uint64_t value = 0; while (true) { auto v = LexHexadecimalValue(Peek()); @@ -343,9 +391,10 @@ namespace ElohimScript::Parser { value <<= 4; value += v; } - return new IntegerLiteral(value); + return new IntegerLiteral(TextSpan(start, _position), value); } IntegerLiteral* Lexer::LexOctal() { + auto start = _position; uint64_t value = 0; while (true) { auto v = LexOctalValue(Peek()); @@ -356,9 +405,10 @@ namespace ElohimScript::Parser { value <<= 3; value += v; } - return new IntegerLiteral(value); + return new IntegerLiteral(TextSpan(start, _position), value); } IntegerLiteral* Lexer::LexBinary() { + auto start = _position; uint64_t value = 0; while (true) { auto v = LexBinaryValue(Peek()); @@ -369,7 +419,7 @@ namespace ElohimScript::Parser { value <<= 1; value += v; } - return new IntegerLiteral(value); + return new IntegerLiteral(TextSpan(start, _position), value); } StringLiteral* Lexer::LexString(char8_t opening, bool heredoc) { Progress(); @@ -388,11 +438,13 @@ namespace ElohimScript::Parser { break; } if (current == u8'\0') { - // TODO: Log error + _diagnostics->LogError(Diagnostics::DiagnosticType::ExpectedEndOfString, + TextSpan(start, start + offset)); break; } if (!heredoc && (current == u8'\n' || current == u8'\r')) { - // TODO: log error + _diagnostics->LogError(Diagnostics::DiagnosticType::ExpectedEndOfString, + TextSpan(start, start + offset)); break; } offset++; @@ -401,14 +453,15 @@ namespace ElohimScript::Parser { if (heredoc) { Progress(2); } - return new StringLiteral(std::u8string(_script.substr(start, offset))); + return new StringLiteral(TextSpan(start, start + _position), std::u8string(_script.substr(start, offset))); } static uint32_t constexpr Hash(const char8_t* input) { - return *input ? static_cast(*input) + 33 * Hash(input + 1) : 5381; + return *input != 0U ? static_cast(*input) + 33 * Hash(input + 1) : 5381; }; LexToken* Lexer::LexKeywordOrIdentifier() { + auto start = _position; auto offset = 0; while (IsAlphaNumericalOrUnderscore(Peek(offset))) { offset++; @@ -416,137 +469,75 @@ namespace ElohimScript::Parser { auto str = _script.substr(_position, offset); Progress(offset); switch (Hash(str.data())) { - case Hash(u8"and"): - return new LexTokenImpl(); - case Hash(u8"abstract"): - return new LexTokenImpl(); - case Hash(u8"auto"): - return new LexTokenImpl(); - case Hash(u8"bool"): - return new LexTokenImpl(); - case Hash(u8"break"): - return new LexTokenImpl(); - case Hash(u8"case"): - return new LexTokenImpl(); - case Hash(u8"cast"): - return new LexTokenImpl(); - case Hash(u8"catch"): - return new LexTokenImpl(); - case Hash(u8"class"): - return new LexTokenImpl(); - case Hash(u8"const"): - return new LexTokenImpl(); - case Hash(u8"continue"): - return new LexTokenImpl(); - case Hash(u8"default"): - return new LexTokenImpl(); - case Hash(u8"do"): - return new LexTokenImpl(); - case Hash(u8"double"): - return new LexTokenImpl(); - case Hash(u8"else"): - return new LexTokenImpl(); - case Hash(u8"enum"): - return new LexTokenImpl(); - case Hash(u8"explicit"): - return new LexTokenImpl(); - case Hash(u8"external"): - return new LexTokenImpl(); - case Hash(u8"false"): - return new LexTokenImpl(); - case Hash(u8"final"): - return new LexTokenImpl(); - case Hash(u8"float"): - return new LexTokenImpl(); - case Hash(u8"for"): - return new LexTokenImpl(); - case Hash(u8"from"): - return new LexTokenImpl(); - case Hash(u8"funcdef"): - return new LexTokenImpl(); - case Hash(u8"function"): - return new LexTokenImpl(); - case Hash(u8"get"): - return new LexTokenImpl(); - case Hash(u8"if"): - return new LexTokenImpl(); - case Hash(u8"import"): - return new LexTokenImpl(); - case Hash(u8"in"): - return new LexTokenImpl(); - case Hash(u8"inout"): - return new LexTokenImpl(); - case Hash(u8"int"): - return new LexTokenImpl(); + case Hash(u8"and"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"abstract"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"auto"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"bool"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"break"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"case"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"cast"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"catch"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"class"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"const"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"continue"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"default"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"do"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"double"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"else"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"enum"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"explicit"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"external"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"false"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"final"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"float"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"for"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"from"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"funcdef"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"function"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"get"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"if"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"import"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"in"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"inout"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"int"): return new LexTokenImpl(TextSpan(start, _position)); case Hash(u8"interface"): - return new LexTokenImpl(); - case Hash(u8"int8"): - return new LexTokenImpl(); - case Hash(u8"int16"): - return new LexTokenImpl(); - case Hash(u8"int32"): - return new LexTokenImpl(); - case Hash(u8"int64"): - return new LexTokenImpl(); - case Hash(u8"is"): - return new LexTokenImpl(); - case Hash(u8"mixin"): - return new LexTokenImpl(); + return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"int8"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"int16"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"int32"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"int64"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"is"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"mixin"): return new LexTokenImpl(TextSpan(start, _position)); case Hash(u8"namespace"): - return new LexTokenImpl(); - case Hash(u8"not"): - return new LexTokenImpl(); - case Hash(u8"null"): - return new LexTokenImpl(); - case Hash(u8"or"): - return new LexTokenImpl(); - case Hash(u8"out"): - return new LexTokenImpl(); - case Hash(u8"override"): - return new LexTokenImpl(); - case Hash(u8"private"): - return new LexTokenImpl(); - case Hash(u8"property"): - return new LexTokenImpl(); + return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"not"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"null"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"or"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"out"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"override"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"private"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"property"): return new LexTokenImpl(TextSpan(start, _position)); case Hash(u8"protected"): - return new LexTokenImpl(); - case Hash(u8"return"): - return new LexTokenImpl(); - case Hash(u8"set"): - return new LexTokenImpl(); - case Hash(u8"shared"): - return new LexTokenImpl(); - case Hash(u8"super"): - return new LexTokenImpl(); - case Hash(u8"switch"): - return new LexTokenImpl(); - case Hash(u8"this"): - return new LexTokenImpl(); - case Hash(u8"true"): - return new LexTokenImpl(); - case Hash(u8"try"): - return new LexTokenImpl(); - case Hash(u8"typedef"): - return new LexTokenImpl(); - case Hash(u8"uint"): - return new LexTokenImpl(); - case Hash(u8"uint8"): - return new LexTokenImpl(); - case Hash(u8"uint16"): - return new LexTokenImpl(); - case Hash(u8"uint32"): - return new LexTokenImpl(); - case Hash(u8"uint64"): - return new LexTokenImpl(); - case Hash(u8"void"): - return new LexTokenImpl(); - case Hash(u8"while"): - return new LexTokenImpl(); - case Hash(u8"xor"): - return new LexTokenImpl(); + return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"return"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"set"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"shared"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"super"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"switch"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"this"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"true"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"try"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"typedef"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"uint"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"uint8"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"uint16"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"uint32"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"uint64"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"void"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"while"): return new LexTokenImpl(TextSpan(start, _position)); + case Hash(u8"xor"): return new LexTokenImpl(TextSpan(start, _position)); - default: - return new IdentifierToken(std::u8string(str)); + default: return new IdentifierToken(TextSpan(start, _position), std::u8string(str)); } } bool Lexer::IsAlphaNumericalOrUnderscore(char8_t c) { diff --git a/src/Parser/Lexer/Lexer.hpp b/src/Parser/Lexer/Lexer.hpp index 391bc6a..1a4790f 100644 --- a/src/Parser/Lexer/Lexer.hpp +++ b/src/Parser/Lexer/Lexer.hpp @@ -2,19 +2,22 @@ #define ELOHIMSCRIPT_LEXER_HPP #include +#include "../../Diagnostics/Diagnostics.hpp" #include "LexToken.hpp" namespace ElohimScript::Parser { class Lexer { public: - Lexer(const char* script) : _script(reinterpret_cast(script)) {} - Lexer(const char8_t* script) : _script(script) {} - Lexer(std::u8string_view script) : _script(script) {} + Lexer(const char* script, Diagnostics::Diagnostics* diag) + : Lexer(reinterpret_cast(script), diag) {} + Lexer(const char8_t* script, Diagnostics::Diagnostics* diag) : _script(script), _diagnostics(diag) {} + Lexer(std::u8string_view script, Diagnostics::Diagnostics* diag) : _script(script), _diagnostics(diag) {} const LexToken* Lex(); private: std::u8string_view _script; size_t _position = -1; + Diagnostics::Diagnostics* _diagnostics; inline char8_t Consume() { if (++_position >= _script.size()) { @@ -23,9 +26,7 @@ namespace ElohimScript::Parser { return _script[_position]; } - inline void Progress(size_t steps = 1){ - _position += steps; - } + inline void Progress(size_t steps = 1) { _position += steps; } inline char8_t Peek(size_t offset = 1) { auto pos = _position + offset; diff --git a/src/Parser/TextSpan.hpp b/src/Parser/TextSpan.hpp new file mode 100644 index 0000000..975de02 --- /dev/null +++ b/src/Parser/TextSpan.hpp @@ -0,0 +1,19 @@ +#ifndef ELOHIMSCRIPT_TEXTSPAN_HPP +#define ELOHIMSCRIPT_TEXTSPAN_HPP + +#include +namespace ElohimScript { + class TextSpan { + size_t _start; + size_t _end; + + public: + inline TextSpan(size_t start, size_t end) : _start(start), _end(end) {} + [[nodiscard]] inline size_t GetStart() const noexcept { return _start; } + [[nodiscard]] inline size_t GetEnd() const noexcept { return _end; } + inline bool operator==(const TextSpan& rhs) const { return _start == rhs._start && _end == rhs._end; } + inline bool operator!=(const TextSpan& rhs) const { return !(rhs == *this); } + }; +} + +#endif // ELOHIMSCRIPT_TEXTSPAN_HPP diff --git a/tests/LexerTests/IdentifierLexTests.cpp b/tests/LexerTests/IdentifierLexTests.cpp index 491a48f..921cb3e 100644 --- a/tests/LexerTests/IdentifierLexTests.cpp +++ b/tests/LexerTests/IdentifierLexTests.cpp @@ -5,8 +5,10 @@ using namespace ElohimScript::Parser; #define KEYWORD_TEST(script, symbol) \ TEST_CASE("Lex " script) { \ - auto lexer = Lexer(script); \ + ElohimScript::Diagnostics::Diagnostics diag; \ + auto lexer = Lexer(script, &diag); \ const auto* token = lexer.Lex(); \ + CHECK(diag.GetMessages().empty()); \ CHECK(token->GetKind() == LexTokenKind::symbol); \ CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile); \ delete token; \ @@ -91,8 +93,10 @@ namespace doctest { #define IDENTIFIER_TEST(identifier) \ TEST_CASE("Lex identifier " identifier) { \ - auto lexer = Lexer(identifier); \ + ElohimScript::Diagnostics::Diagnostics diag; \ + auto lexer = Lexer(identifier, &diag); \ const auto* token = lexer.Lex(); \ + CHECK(diag.GetMessages().empty()); \ REQUIRE(token->GetKind() == LexTokenKind::Identifier); \ auto value = ((IdentifierToken*)token)->GetValue(); \ CHECK(value == std::u8string(reinterpret_cast(identifier))); \ diff --git a/tests/LexerTests/NumericalLexTests.cpp b/tests/LexerTests/NumericalLexTests.cpp index d2ffcaf..028cac4 100644 --- a/tests/LexerTests/NumericalLexTests.cpp +++ b/tests/LexerTests/NumericalLexTests.cpp @@ -5,8 +5,10 @@ using namespace ElohimScript::Parser; #define INTEGER_TEST(script, expected) \ TEST_CASE("Lex " script) { \ - auto lexer = Lexer(script); \ + ElohimScript::Diagnostics::Diagnostics diag; \ + auto lexer = Lexer(script, &diag); \ const auto* token = lexer.Lex(); \ + CHECK(diag.GetMessages().empty()); \ REQUIRE(token->GetKind() == LexTokenKind::IntegerLiteral); \ auto value = ((const IntegerLiteral*)token)->GetValue(); \ CHECK(value == (expected)); \ @@ -16,8 +18,10 @@ using namespace ElohimScript::Parser; #define FLOAT_TEST(script, expected) \ TEST_CASE("Lex " script) { \ - auto lexer = Lexer(script); \ + ElohimScript::Diagnostics::Diagnostics diag; \ + auto lexer = Lexer(script, &diag); \ const auto* token = lexer.Lex(); \ + CHECK(diag.GetMessages().empty()); \ REQUIRE(token->GetKind() == LexTokenKind::FloatLiteral); \ auto value = ((const FloatLiteral*)token)->GetValue(); \ CHECK(value == (expected)); \ @@ -63,4 +67,16 @@ INTEGER_TEST("0b1111", 15); INTEGER_TEST("0b110011", 51); #undef INTEGER_TEST -#undef FLOAT_TEST \ No newline at end of file +#undef FLOAT_TEST + +TEST_CASE("Lex invalid numerical base") { + ElohimScript::Diagnostics::Diagnostics diag; + auto lexer = Lexer("0f553", &diag); + const auto* token = lexer.Lex(); + const auto& messages = diag.GetMessages(); + REQUIRE(messages.size() == 1); + CHECK(messages[0].GetType() == ElohimScript::Diagnostics::DiagnosticType::InvalidNumericalBase); + CHECK(messages[0].GetLevel() == ElohimScript::Diagnostics::DiagnosticLevel::Error); + CHECK(messages[0].GetSpan() == ElohimScript::TextSpan(0, 2)); + delete token; +} diff --git a/tests/LexerTests/StringLexTests.cpp b/tests/LexerTests/StringLexTests.cpp index e1a8a47..3df238f 100644 --- a/tests/LexerTests/StringLexTests.cpp +++ b/tests/LexerTests/StringLexTests.cpp @@ -5,8 +5,10 @@ using namespace ElohimScript::Parser; #define STRING_TEST(str, constraint) \ TEST_CASE("Lex string " constraint str constraint) { \ - auto lexer = Lexer(constraint str constraint); \ + ElohimScript::Diagnostics::Diagnostics diag; \ + auto lexer = Lexer(constraint str constraint, &diag); \ const auto* token = lexer.Lex(); \ + CHECK(diag.GetMessages().empty()); \ REQUIRE(token->GetKind() == LexTokenKind::StringLiteral); \ auto value = ((const StringLiteral*)token)->GetValue(); \ CHECK(value == std::u8string(reinterpret_cast(str))); \ @@ -21,9 +23,12 @@ STRING_TEST("\"foo bar\"", "\"\"\""); STRING_TEST("\"\"foo bar\"\"", "\"\"\""); TEST_CASE("Lex multiline string") { + ElohimScript::Diagnostics::Diagnostics diag; auto lexer = Lexer(R"("""foo -bar""")"); +bar""")", + &diag); const auto* token = lexer.Lex(); + CHECK(diag.GetMessages().empty()); REQUIRE(token->GetKind() == LexTokenKind::StringLiteral); auto value = (dynamic_cast(token))->GetValue(); CHECK(value == std::u8string(reinterpret_cast(R"(foo diff --git a/tests/LexerTests/SymbolLexTests.cpp b/tests/LexerTests/SymbolLexTests.cpp index 0d61e27..fecf65f 100644 --- a/tests/LexerTests/SymbolLexTests.cpp +++ b/tests/LexerTests/SymbolLexTests.cpp @@ -6,8 +6,10 @@ using namespace ElohimScript::Parser; #define SYMBOL_TEST(script, symbol) \ TEST_CASE("Lex " script) { \ - auto lexer = Lexer(script); \ + ElohimScript::Diagnostics::Diagnostics diag; \ + auto lexer = Lexer(script, &diag); \ const auto* token = lexer.Lex(); \ + CHECK(diag.GetMessages().empty()); \ CHECK(token->GetKind() == LexTokenKind::symbol); \ CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile); \ delete token; \ @@ -71,9 +73,11 @@ SYMBOL_TEST(" ", Whitespace) TEST_CASE("Lex whitespace") { auto whitespace = {" ", "\t", "\n", "\r", "\xef\xbb\xbf"}; - for (auto v : whitespace) { - auto lexer = Lexer(v); + for (const auto *v : whitespace) { + ElohimScript::Diagnostics::Diagnostics diag; + auto lexer = Lexer(v, &diag); const auto* token = lexer.Lex(); + CHECK(diag.GetMessages().empty()); CHECK(token->GetKind() == LexTokenKind::Whitespace); CHECK(token->GetNext()->GetKind() == LexTokenKind::EndOfFile); delete token;