From 807fe6382889ca97af34b07f397b4055bd6ff052 Mon Sep 17 00:00:00 2001 From: Deukhoofd Date: Wed, 6 Jan 2021 00:25:08 +0100 Subject: [PATCH] More detailed error messages for lex errors. --- src/Diagnostics/DiagnosticType.hpp | 2 +- src/Diagnostics/DiagnosticTypeEN_US.hpp | 9 ++++++--- src/Diagnostics/Logger.hpp | 5 +++-- src/Parser/Lexer/Lexer.cpp | 14 +++++++++----- src/Parser/Lexer/Lexer.hpp | 4 ++-- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/Diagnostics/DiagnosticType.hpp b/src/Diagnostics/DiagnosticType.hpp index cbf6c5a..aaf3843 100644 --- a/src/Diagnostics/DiagnosticType.hpp +++ b/src/Diagnostics/DiagnosticType.hpp @@ -4,7 +4,7 @@ namespace MalachScript::Diagnostics { enum class DiagnosticType : uint8_t { - UnknownToken, + UnknownCharacter, InvalidNumericalBase, ExpectedEndOfString, UnexpectedToken, diff --git a/src/Diagnostics/DiagnosticTypeEN_US.hpp b/src/Diagnostics/DiagnosticTypeEN_US.hpp index 8940ddd..84ef84f 100644 --- a/src/Diagnostics/DiagnosticTypeEN_US.hpp +++ b/src/Diagnostics/DiagnosticTypeEN_US.hpp @@ -9,9 +9,12 @@ namespace MalachScript::Diagnostics { public: static std::string ToEnglishString(const Diagnostic* diag) { switch (diag->GetType()) { - case DiagnosticType::UnknownToken: return "Unknown token"; - case DiagnosticType::InvalidNumericalBase: return "Invalid numerical base"; - case DiagnosticType::ExpectedEndOfString: return "Expected end of string"; + case DiagnosticType::UnknownCharacter: + return util::Format("Unknown character: '{0}'", diag->GetFormats()); + case DiagnosticType::InvalidNumericalBase: + return util::Format("Invalid numerical base: '0{0}'", diag->GetFormats()); + case DiagnosticType::ExpectedEndOfString: + return util::Format("Expected end of string, found {0}", diag->GetFormats()); case DiagnosticType::UnexpectedToken: return util::Format("Unexpected Token. Expected any of: {0}, but found {1}", diag->GetFormats()); case DiagnosticType::DoubleProperty: return "Property block found twice."; diff --git a/src/Diagnostics/Logger.hpp b/src/Diagnostics/Logger.hpp index 9287826..b925593 100644 --- a/src/Diagnostics/Logger.hpp +++ b/src/Diagnostics/Logger.hpp @@ -22,8 +22,9 @@ namespace MalachScript::Diagnostics { inline void LogWarning(DiagnosticType type, std::u8string_view scriptName, TextSpan span) { Log(DiagnosticLevel::Warning, type, scriptName, span); } - inline void LogError(DiagnosticType type, std::u8string_view scriptName, TextSpan span) { - Log(DiagnosticLevel::Error, type, scriptName, span); + inline void LogError(DiagnosticType type, std::u8string_view scriptName, TextSpan span, + const std::vector& formats = {}) { + Log(DiagnosticLevel::Error, type, scriptName, span, formats); } inline void LogCritical(DiagnosticType type, std::u8string_view scriptName, TextSpan span) { Log(DiagnosticLevel::Critical, type, scriptName, span); diff --git a/src/Parser/Lexer/Lexer.cpp b/src/Parser/Lexer/Lexer.cpp index 6209852..6365010 100644 --- a/src/Parser/Lexer/Lexer.cpp +++ b/src/Parser/Lexer/Lexer.cpp @@ -254,7 +254,8 @@ namespace MalachScript::Parser { Progress(2); return Create>(TextSpan(start, start + 3)); } - LogError(Diagnostics::DiagnosticType::UnknownToken, TextSpan(start, start + 1)); + LogError(Diagnostics::DiagnosticType::UnknownCharacter, TextSpan(start, start + 1), + {std::string(1, c)}); return Create>(TextSpan(start, start + 1)); } case u8'0': @@ -278,7 +279,8 @@ namespace MalachScript::Parser { default: if (IsAlphaNumericalOrUnderscore(c)) return LexKeywordOrIdentifier(); - LogError(Diagnostics::DiagnosticType::UnknownToken, TextSpan(start, start + 1)); + LogError(Diagnostics::DiagnosticType::UnknownCharacter, TextSpan(start, start + 1), + {std::string(1, (char)c)}); return Create>(TextSpan(start, start + 1)); } } @@ -302,7 +304,7 @@ namespace MalachScript::Parser { case 'B': numericalSystem = 2; break; default: LogError(Diagnostics::DiagnosticType::InvalidNumericalBase, - TextSpan(_position - 1, _position + 1)); + TextSpan(_position - 1, _position + 1), {std::string(1, secondChar)}); // Set to the largest numerical system, so we can prevent errors down the line. numericalSystem = 16; break; @@ -449,11 +451,13 @@ namespace MalachScript::Parser { break; } if (current == u8'\0') { - LogError(Diagnostics::DiagnosticType::ExpectedEndOfString, TextSpan(start, start + offset)); + LogError(Diagnostics::DiagnosticType::ExpectedEndOfString, TextSpan(start, start + offset), + {"EndOfFile"}); break; } if (!heredoc && (current == u8'\n' || current == u8'\r')) { - LogError(Diagnostics::DiagnosticType::ExpectedEndOfString, TextSpan(start, start + offset)); + LogError(Diagnostics::DiagnosticType::ExpectedEndOfString, TextSpan(start, start + offset), + {"Newline"}); break; } offset++; diff --git a/src/Parser/Lexer/Lexer.hpp b/src/Parser/Lexer/Lexer.hpp index acb241a..782da5b 100644 --- a/src/Parser/Lexer/Lexer.hpp +++ b/src/Parser/Lexer/Lexer.hpp @@ -57,8 +57,8 @@ namespace MalachScript::Parser { return _allocator.Create(args...); } - inline void LogError(Diagnostics::DiagnosticType type, TextSpan span) { - _diagnostics->LogError(type, _scriptName, span); + inline void LogError(Diagnostics::DiagnosticType type, TextSpan span, const std::vector& formats) { + _diagnostics->LogError(type, _scriptName, span, formats); } }; }