From 469c708788436136646cdfbab7ff8c4d97347374 Mon Sep 17 00:00:00 2001 From: Deukhoofd Date: Sun, 4 Oct 2020 21:05:51 +0200 Subject: [PATCH] Fixes bug in identifier/keyword parsing, adds lexer integration tests. --- src/Parser/Lexer/Lexer.cpp | 8 +++---- tests/LexerTests/LexerIntegrationTests.cpp | 27 ++++++++++++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 tests/LexerTests/LexerIntegrationTests.cpp diff --git a/src/Parser/Lexer/Lexer.cpp b/src/Parser/Lexer/Lexer.cpp index 29b890d..543c737 100644 --- a/src/Parser/Lexer/Lexer.cpp +++ b/src/Parser/Lexer/Lexer.cpp @@ -466,9 +466,9 @@ namespace ElohimScript::Parser { while (IsAlphaNumericalOrUnderscore(Peek(offset))) { offset++; } - auto str = _script.substr(_position, offset); - Progress(offset); - switch (Hash(str.data())) { + auto str = std::u8string(_script.substr(start, offset)); + Progress(offset - 1); + switch (Hash(str.c_str())) { case Hash(u8"and"): return new LexTokenImpl(TextSpan(start, _position)); case Hash(u8"abstract"): return new LexTokenImpl(TextSpan(start, _position)); case Hash(u8"auto"): return new LexTokenImpl(TextSpan(start, _position)); @@ -537,7 +537,7 @@ namespace ElohimScript::Parser { case Hash(u8"while"): return new LexTokenImpl(TextSpan(start, _position)); case Hash(u8"xor"): return new LexTokenImpl(TextSpan(start, _position)); - default: return new IdentifierToken(TextSpan(start, _position), std::u8string(str)); + default: return new IdentifierToken(TextSpan(start, _position), str); } } bool Lexer::IsAlphaNumericalOrUnderscore(char8_t c) { diff --git a/tests/LexerTests/LexerIntegrationTests.cpp b/tests/LexerTests/LexerIntegrationTests.cpp new file mode 100644 index 0000000..0099c2a --- /dev/null +++ b/tests/LexerTests/LexerIntegrationTests.cpp @@ -0,0 +1,27 @@ +#include "../../extern/doctest.hpp" +#include "../../src/Parser/Lexer/Lexer.hpp" + +using namespace ElohimScript::Parser; + +#define LEX_TEST(script, ...) \ + TEST_CASE("Lex: " script) { \ + ElohimScript::Diagnostics::Diagnostics diag; \ + auto lexer = Lexer(script, &diag); \ + const auto* token = lexer.Lex(); \ + CHECK(diag.GetMessages().empty()); \ + std::vector vec = {__VA_ARGS__, LexTokenKind::EndOfFile}; \ + const auto* current = token; \ + auto pos = 0; \ + while (current != nullptr) { \ + CHECK_MESSAGE(current->GetKind() == vec[pos], "position: " << pos); \ + pos++; \ + current = current->GetNext().get(); \ + } \ + delete token; \ + } + +LEX_TEST("1 + 1", LexTokenKind::IntegerLiteral, LexTokenKind::Whitespace, LexTokenKind::PlusSymbol, + LexTokenKind::Whitespace, LexTokenKind::IntegerLiteral); + +LEX_TEST("private foo = \"foobar\"", LexTokenKind::PrivateKeyword, LexTokenKind::Whitespace, LexTokenKind::Identifier, + LexTokenKind::Whitespace, LexTokenKind::EqualsSymbol, LexTokenKind::Whitespace, LexTokenKind::StringLiteral); \ No newline at end of file