From 469c708788436136646cdfbab7ff8c4d97347374 Mon Sep 17 00:00:00 2001
From: Deukhoofd <Deukhoofd@gmail.com>
Date: Sun, 4 Oct 2020 21:05:51 +0200
Subject: [PATCH] Fixes bug in identifier/keyword parsing, adds lexer
 integration tests.

---
 src/Parser/Lexer/Lexer.cpp                 |  8 +++----
 tests/LexerTests/LexerIntegrationTests.cpp | 27 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 tests/LexerTests/LexerIntegrationTests.cpp
diff --git a/src/Parser/Lexer/Lexer.cpp b/src/Parser/Lexer/Lexer.cpp
index 29b890d..543c737 100644
--- a/src/Parser/Lexer/Lexer.cpp
+++ b/src/Parser/Lexer/Lexer.cpp
@@ -466,9 +466,9 @@ namespace ElohimScript::Parser {
         while (IsAlphaNumericalOrUnderscore(Peek(offset))) {
             offset++;
         }
-        auto str = _script.substr(_position, offset);
-        Progress(offset);
-        switch (Hash(str.data())) {
+        auto str = std::u8string(_script.substr(start, offset));
+        Progress(offset - 1);
+        switch (Hash(str.c_str())) {
             case Hash(u8"and"): return new LexTokenImpl<LexTokenKind::AndKeyword>(TextSpan(start, _position));
             case Hash(u8"abstract"): return new LexTokenImpl<LexTokenKind::AbstractKeyword>(TextSpan(start, _position));
             case Hash(u8"auto"): return new LexTokenImpl<LexTokenKind::AutoKeyword>(TextSpan(start, _position));
@@ -537,7 +537,7 @@ namespace ElohimScript::Parser {
             case Hash(u8"while"): return new LexTokenImpl<LexTokenKind::WhileKeyword>(TextSpan(start, _position));
             case Hash(u8"xor"): return new LexTokenImpl<LexTokenKind::XorKeyword>(TextSpan(start, _position));
 
-            default: return new IdentifierToken(TextSpan(start, _position), std::u8string(str));
+            default: return new IdentifierToken(TextSpan(start, _position), str);
         }
     }
     bool Lexer::IsAlphaNumericalOrUnderscore(char8_t c) {
diff --git a/tests/LexerTests/LexerIntegrationTests.cpp b/tests/LexerTests/LexerIntegrationTests.cpp
new file mode 100644
index 0000000..0099c2a
--- /dev/null
+++ b/tests/LexerTests/LexerIntegrationTests.cpp
@@ -0,0 +1,27 @@
+#include "../../extern/doctest.hpp"
+#include "../../src/Parser/Lexer/Lexer.hpp"
+
+using namespace ElohimScript::Parser;
+
+#define LEX_TEST(script, ...)                                                                                          \
+    TEST_CASE("Lex: " script) {                                                                                        \
+        ElohimScript::Diagnostics::Diagnostics diag;                                                                   \
+        auto lexer = Lexer(script, &diag);                                                                             \
+        const auto* token = lexer.Lex();                                                                               \
+        CHECK(diag.GetMessages().empty());                                                                             \
+        std::vector<LexTokenKind> vec = {__VA_ARGS__, LexTokenKind::EndOfFile};                                        \
+        const auto* current = token;                                                                                   \
+        auto pos = 0;                                                                                                  \
+        while (current != nullptr) {                                                                                   \
+            CHECK_MESSAGE(current->GetKind() == vec[pos], "position: " << pos);                                        \
+            pos++;                                                                                                     \
+            current = current->GetNext().get();                                                                        \
+        }                                                                                                              \
+        delete token;                                                                                                  \
+    }
+
+LEX_TEST("1 + 1", LexTokenKind::IntegerLiteral, LexTokenKind::Whitespace, LexTokenKind::PlusSymbol,
+         LexTokenKind::Whitespace, LexTokenKind::IntegerLiteral);
+
+LEX_TEST("private foo = \"foobar\"", LexTokenKind::PrivateKeyword, LexTokenKind::Whitespace, LexTokenKind::Identifier,
+         LexTokenKind::Whitespace, LexTokenKind::EqualsSymbol, LexTokenKind::Whitespace, LexTokenKind::StringLiteral);
\ No newline at end of file