From ae255988644d2eaa55696939dfe98607fd9df6f9 Mon Sep 17 00:00:00 2001
From: Deukhoofd <Deukhoofd@gmail.com>
Date: Tue, 21 May 2019 15:11:00 +0200
Subject: [PATCH] Expanded on diagnostics, make whitespace completely ignored

---
 CMakeLists.txt                          |  2 +-
 integration_tests/integration_tests.cpp | 27 +++++++++++++++++++++++++
 src/Diagnostics/Diagnostic.hpp          | 15 ++++++++++++++
 src/Diagnostics/Diagnostics.hpp         |  8 ++++++++
 src/Parser/Lexer.cpp                    |  6 ++++--
 src/Parser/LexerTests.cpp               | 27 +++++++++----------------
 src/Parser/Parser.cpp                   |  2 ++
 7 files changed, 67 insertions(+), 20 deletions(-)
 create mode 100644 integration_tests/integration_tests.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4f65143..c4f2613 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,7 +23,7 @@ add_library(PorygonLang ${SRC_FILES})
 add_executable(PorygonLangTests
         ${SRC_FILES}
         src/Parser/LexerTests.cpp
-    )
+        integration_tests/integration_tests.cpp)
 
 target_compile_definitions(PorygonLangTests PRIVATE TESTS_BUILD)
 
diff --git a/integration_tests/integration_tests.cpp b/integration_tests/integration_tests.cpp
new file mode 100644
index 0000000..977faa0
--- /dev/null
+++ b/integration_tests/integration_tests.cpp
@@ -0,0 +1,27 @@
+#ifdef TESTS_BUILD
+#define CATCH_CONFIG_MAIN
+#include <catch.hpp>
+#include "../src/Script.hpp"
+
+TEST_CASE( "Diagnostic invalid character", "[integration]" ) {
+    Script script = Script::Create("1 + 1 @");
+    REQUIRE(script.Diagnostics -> HasErrors());
+    auto diags = script.Diagnostics -> GetDiagnostics();
+    REQUIRE(diags.size() == 1);
+    CHECK(diags[0].GetCode() == DiagnosticCode::UnexpectedCharacter);
+    CHECK(diags[0].GetStartPosition() == 6);
+    CHECK(diags[0].GetLength() == 1);
+}
+
+TEST_CASE( "Diagnostic invalid token", "[integration]" ) {
+    Script script = Script::Create("1 +/ 1");
+    REQUIRE(script.Diagnostics -> HasErrors());
+    auto diags = script.Diagnostics -> GetDiagnostics();
+    REQUIRE(diags.size() == 1);
+    CHECK(diags[0].GetCode() == DiagnosticCode::UnexpectedToken);
+    CHECK(diags[0].GetStartPosition() == 3);
+    CHECK(diags[0].GetLength() == 1);
+}
+
+
+#endif
\ No newline at end of file
diff --git a/src/Diagnostics/Diagnostic.hpp b/src/Diagnostics/Diagnostic.hpp
index 9b1b50b..43a6da4 100644
--- a/src/Diagnostics/Diagnostic.hpp
+++ b/src/Diagnostics/Diagnostic.hpp
@@ -17,6 +17,21 @@ public:
         _start = start;
         _length = length;
     }
+
+    DiagnosticSeverity GetSeverity(){
+        return _severity;
+    }
+    DiagnosticCode GetCode(){
+        return _code;
+    }
+
+    unsigned int GetStartPosition(){
+        return _start;
+    }
+
+    unsigned int GetLength(){
+        return _length;
+    }
 };
 
 #endif //PORYGONLANG_DIAGNOSTIC_HPP
diff --git a/src/Diagnostics/Diagnostics.hpp b/src/Diagnostics/Diagnostics.hpp
index 4d53a42..d1285db 100644
--- a/src/Diagnostics/Diagnostics.hpp
+++ b/src/Diagnostics/Diagnostics.hpp
@@ -17,6 +17,10 @@ public:
         _hasErrors = false;
     }
 
+    ~Diagnostics(){
+        _diagnostics.clear();
+    }
+
     void Log(DiagnosticSeverity severity, DiagnosticCode code, unsigned int start, unsigned int length){
         _diagnostics.emplace_back(severity, code, start, length);
         if (severity >= DiagnosticSeverity::Error){
@@ -38,6 +42,10 @@ public:
     bool HasErrors(){
         return _hasErrors;
     }
+
+    vector<Diagnostic> GetDiagnostics(){
+        return _diagnostics;
+    }
 };
 
 
diff --git a/src/Parser/Lexer.cpp b/src/Parser/Lexer.cpp
index e765ca4..7a272e9 100644
--- a/src/Parser/Lexer.cpp
+++ b/src/Parser/Lexer.cpp
@@ -13,8 +13,10 @@ vector<IToken*> Lexer::Lex() {
     vector<IToken*> tokens;
     while (true){
         IToken* next = this -> LexNext(this -> Next());
-        tokens.push_back(next);
-        if (next->GetKind() == TokenKind::EndOfFile)
+        auto nextKind = next -> GetKind();
+        if (nextKind != TokenKind::WhiteSpace)
+            tokens.push_back(next);
+        if (nextKind == TokenKind::EndOfFile)
             break;
     }
     return tokens;
diff --git a/src/Parser/LexerTests.cpp b/src/Parser/LexerTests.cpp
index 72e1e44..4e14dd4 100644
--- a/src/Parser/LexerTests.cpp
+++ b/src/Parser/LexerTests.cpp
@@ -1,5 +1,4 @@
 #ifdef TESTS_BUILD
-#define CATCH_CONFIG_MAIN
 #include <catch.hpp>
 #include "Lexer.hpp"
 
@@ -259,28 +258,22 @@ TEST_CASE( "Lex identifier", "[lexer]" ) {
 TEST_CASE( "Lex Start Position", "[lexer]" ) {
     Lexer lexer = Lexer("+ - bar 1234", nullptr);
     auto tokens = lexer.Lex();
-    REQUIRE(tokens.size() == 8);
+    REQUIRE(tokens.size() == 5);
     CHECK(((IdentifierToken*)tokens[0]) -> GetStartPosition() == 0);
-    CHECK(((IdentifierToken*)tokens[1]) -> GetStartPosition() == 1);
-    CHECK(((IdentifierToken*)tokens[2]) -> GetStartPosition() == 2);
-    CHECK(((IdentifierToken*)tokens[3]) -> GetStartPosition() == 3);
-    CHECK(((IdentifierToken*)tokens[4]) -> GetStartPosition() == 4);
-    CHECK(((IdentifierToken*)tokens[5]) -> GetStartPosition() == 7);
-    CHECK(((IdentifierToken*)tokens[6]) -> GetStartPosition() == 8);
-    CHECK(((IdentifierToken*)tokens[7]) -> GetStartPosition() == 12);
+    CHECK(((IdentifierToken*)tokens[1]) -> GetStartPosition() == 2);
+    CHECK(((IdentifierToken*)tokens[2]) -> GetStartPosition() == 4);
+    CHECK(((IdentifierToken*)tokens[3]) -> GetStartPosition() == 8);
+    CHECK(((IdentifierToken*)tokens[4]) -> GetStartPosition() == 12);
 }
 
 TEST_CASE( "Lex End Position", "[lexer]" ) {
     Lexer lexer = Lexer("+ - bar 1234", nullptr);
     auto tokens = lexer.Lex();
-    REQUIRE(tokens.size() == 8);
+    REQUIRE(tokens.size() == 5);
     CHECK(((IdentifierToken*)tokens[0]) -> GetEndPosition() == 0);
-    CHECK(((IdentifierToken*)tokens[1]) -> GetEndPosition() == 1);
-    CHECK(((IdentifierToken*)tokens[2]) -> GetEndPosition() == 2);
-    CHECK(((IdentifierToken*)tokens[3]) -> GetEndPosition() == 3);
-    CHECK(((IdentifierToken*)tokens[4]) -> GetEndPosition() == 6);
-    CHECK(((IdentifierToken*)tokens[5]) -> GetEndPosition() == 7);
-    CHECK(((IdentifierToken*)tokens[6]) -> GetEndPosition() == 11);
-    CHECK(((IdentifierToken*)tokens[7]) -> GetEndPosition() == 12);
+    CHECK(((IdentifierToken*)tokens[1]) -> GetEndPosition() == 2);
+    CHECK(((IdentifierToken*)tokens[2]) -> GetEndPosition() == 6);
+    CHECK(((IdentifierToken*)tokens[3]) -> GetEndPosition() == 11);
+    CHECK(((IdentifierToken*)tokens[4]) -> GetEndPosition() == 12);
 }
 #endif
\ No newline at end of file
diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp
index f8a77e5..1b98f18 100644
--- a/src/Parser/Parser.cpp
+++ b/src/Parser/Parser.cpp
@@ -110,6 +110,8 @@ ParsedExpression *Parser::ParsePrimaryExpression(IToken *current) {
         case TokenKind ::Float: return new LiteralFloatExpression((FloatToken*)current);
         case TokenKind ::TrueKeyword: return new LiteralBoolExpression(current);
         case TokenKind ::FalseKeyword: return new LiteralBoolExpression(current);
+        // If we find a bad token here, we should have already logged it in the lexer, so don't log another error.
+        case TokenKind ::BadToken: return new BadExpression(current->GetStartPosition(), current->GetLength());
         default:
             this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedToken, current->GetStartPosition(), current->GetLength());
             return new BadExpression(current->GetStartPosition(), current->GetLength());