From 7edd51d5e3c8f5a25899ecfd72219bf32ba00bf3 Mon Sep 17 00:00:00 2001
From: Deukhoofd <Deukhoofd@gmail.com>
Date: Sun, 19 May 2019 16:11:16 +0200
Subject: [PATCH] Save position and length of tokens

---
 src/Parser/Lexer.cpp      | 66 +++++++++++++++++++++------------------
 src/Parser/LexerTests.cpp | 30 +++++++++++++++++-
 src/Parser/Token.hpp      | 23 +++++++++++---
 3 files changed, 84 insertions(+), 35 deletions(-)
diff --git a/src/Parser/Lexer.cpp b/src/Parser/Lexer.cpp
index 279ccc9..907eb13 100644
--- a/src/Parser/Lexer.cpp
+++ b/src/Parser/Lexer.cpp
@@ -34,23 +34,23 @@ char Lexer::Next(){
 IToken* Lexer::LexNext(char c){
     switch (c) {
         case '\0':
-            return new SimpleToken(TokenKind::EndOfFile);
+            return new SimpleToken(TokenKind::EndOfFile, Lexer::Position - 1, 1);
             case ' ': case '\t': case '\n': case '\r': case '\v': case '\f':
-            return new SimpleToken(TokenKind::WhiteSpace);
+            return new SimpleToken(TokenKind::WhiteSpace, Lexer::Position - 1, 1);
         case '+':
-            return new SimpleToken(TokenKind::PlusToken);
+            return new SimpleToken(TokenKind::PlusToken, Lexer::Position - 1, 1);
         case '-':
-            return new SimpleToken(TokenKind::MinusToken);
+            return new SimpleToken(TokenKind::MinusToken, Lexer::Position - 1, 1);
         case '/':
-            return new SimpleToken(TokenKind::SlashToken);
+            return new SimpleToken(TokenKind::SlashToken, Lexer::Position - 1, 1);
         case '*':
-            return new SimpleToken(TokenKind::StarToken);
+            return new SimpleToken(TokenKind::StarToken, Lexer::Position - 1, 1);
         case '=':
             if (Lexer::Peek() == '='){
                 Lexer::Next();
-                return new SimpleToken(TokenKind::EqualityToken);
+                return new SimpleToken(TokenKind::EqualityToken, Lexer::Position - 2, 2);
             }
-            return new SimpleToken(TokenKind::AssignmentToken);
+            return new SimpleToken(TokenKind::AssignmentToken, Lexer::Position - 1, 1);
         case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
             return LexNumber(c);
         case '_':
@@ -85,6 +85,8 @@ IToken* Lexer::LexNumber(char c){
     short decimal_index = 0;
     bool has_point = false;
     bool is_searching = true;
+    unsigned int start = Lexer::Position - 1;
+    unsigned int length = 1;
     while (is_searching){
         char next = Lexer::Peek();
         int next_val = CharToInt(next);
@@ -92,12 +94,14 @@ IToken* Lexer::LexNumber(char c){
             switch (next){
                 case '_':
                     Lexer::Next();
+                    length++;
                     continue;
                 case '.':
                     Lexer::Next();
                     has_point = true;
                     decimal_index = 0;
                     float_value = int_value;
+                    length++;
                     continue;
                 default:
                     is_searching = false;
@@ -106,6 +110,7 @@ IToken* Lexer::LexNumber(char c){
         }
         else{
             Lexer::Next();
+            length++;
             if (has_point){
                 decimal_index++;
                 float_value += next_val / pow(10, decimal_index);
@@ -117,10 +122,10 @@ IToken* Lexer::LexNumber(char c){
         }
     }
     if (has_point){
-        return new FloatToken(float_value);
+        return new FloatToken(float_value, start, length);
     }
     else{
-        return new IntegerToken(int_value);
+        return new IntegerToken(int_value, start, length);
     }
 }
 
@@ -132,6 +137,7 @@ unsigned constexpr const_hash(char const *input) {
 
 IToken* Lexer::LexIdentifierOrKeyword(char c){
     vector<char> charVec(1, c);
+    auto start = Lexer::Position - 1;
     while (true){
         char next = Lexer::Peek();
         if (next == '\0') break;
@@ -145,25 +151,25 @@ IToken* Lexer::LexIdentifierOrKeyword(char c){
     }
     string s = string(charVec.begin(), charVec.end());
     switch (const_hash(s.c_str())){
-        case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword);
-        case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword);
-        case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword);
-        case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword);
-        case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword);
-        case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword);
-        case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword);
-        case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword);
-        case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword);
-        case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword);
-        case const_hash("in"): return new SimpleToken(TokenKind::InKeyword);
-        case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword);
-        case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword);
-        case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword);
-        case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword);
-        case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword);
-        case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword);
-        case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword);
-        case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword);
-        default: return new IdentifierToken(s);
+        case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
+        case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
+        case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2);
+        case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4);
+        case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6);
+        case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3);
+        case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5);
+        case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3);
+        case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8);
+        case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2);
+        case const_hash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2);
+        case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5);
+        case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3);
+        case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3);
+        case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2);
+        case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6);
+        case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4);
+        case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4);
+        case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5);
+        default: return new IdentifierToken(s, start, s.length());
     }
 }
\ No newline at end of file
diff --git a/src/Parser/LexerTests.cpp b/src/Parser/LexerTests.cpp
index 1658882..82e862b 100644
--- a/src/Parser/LexerTests.cpp
+++ b/src/Parser/LexerTests.cpp
@@ -73,7 +73,7 @@ TEST_CASE( "Lex Whitespace", "[lexer]" ) {
     CHECK(lexer.LexNext('\f') -> GetKind() == TokenKind::WhiteSpace);
 }
 
-TEST_CASE( "Lex Basic Integers", "[lexer]" ) {
+TEST_CASE( "Lex Basic Digits", "[lexer]" ) {
     Lexer lexer = Lexer("");
     CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer);
     CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer);
@@ -255,4 +255,32 @@ TEST_CASE( "Lex identifier", "[lexer]" ) {
     REQUIRE(firstToken -> GetKind() == TokenKind::Identifier);
     REQUIRE(((IdentifierToken*)firstToken) -> Value == "foo");
 }
+
+TEST_CASE( "Lex Start Position", "[lexer]" ) {
+    Lexer lexer = Lexer("+ - bar 1234");
+    auto tokens = lexer.Lex();
+    REQUIRE(tokens.size() == 8);
+    CHECK(((IdentifierToken*)tokens[0]) -> GetStartPosition() == 0);
+    CHECK(((IdentifierToken*)tokens[1]) -> GetStartPosition() == 1);
+    CHECK(((IdentifierToken*)tokens[2]) -> GetStartPosition() == 2);
+    CHECK(((IdentifierToken*)tokens[3]) -> GetStartPosition() == 3);
+    CHECK(((IdentifierToken*)tokens[4]) -> GetStartPosition() == 4);
+    CHECK(((IdentifierToken*)tokens[5]) -> GetStartPosition() == 7);
+    CHECK(((IdentifierToken*)tokens[6]) -> GetStartPosition() == 8);
+    CHECK(((IdentifierToken*)tokens[7]) -> GetStartPosition() == 12);
+}
+
+TEST_CASE( "Lex End Position", "[lexer]" ) {
+    Lexer lexer = Lexer("+ - bar 1234");
+    auto tokens = lexer.Lex();
+    REQUIRE(tokens.size() == 8);
+    CHECK(((IdentifierToken*)tokens[0]) -> GetEndPosition() == 0);
+    CHECK(((IdentifierToken*)tokens[1]) -> GetEndPosition() == 1);
+    CHECK(((IdentifierToken*)tokens[2]) -> GetEndPosition() == 2);
+    CHECK(((IdentifierToken*)tokens[3]) -> GetEndPosition() == 3);
+    CHECK(((IdentifierToken*)tokens[4]) -> GetEndPosition() == 6);
+    CHECK(((IdentifierToken*)tokens[5]) -> GetEndPosition() == 7);
+    CHECK(((IdentifierToken*)tokens[6]) -> GetEndPosition() == 11);
+    CHECK(((IdentifierToken*)tokens[7]) -> GetEndPosition() == 12);
+}
 #endif
\ No newline at end of file
diff --git a/src/Parser/Token.hpp b/src/Parser/Token.hpp
index dfa1b5a..96a0c28 100644
--- a/src/Parser/Token.hpp
+++ b/src/Parser/Token.hpp
@@ -7,15 +7,30 @@
 using namespace std;
 
 class IToken{
+    unsigned int Position;
+    unsigned int Length;
 public:
     virtual TokenKind GetKind() = 0;
+
+    IToken(unsigned int position, unsigned int length){
+        Position = position;
+        Length = length;
+    }
+
+    unsigned int GetStartPosition(){
+        return Position;
+    }
+
+    unsigned int GetEndPosition(){
+        return Position + Length - 1;
+    }
 };
 
 class SimpleToken : public IToken{
 public:
     TokenKind Kind;
 
-    explicit SimpleToken(TokenKind type){
+    explicit SimpleToken(TokenKind type, unsigned int position, unsigned int length) : IToken(position, length){
         Kind = type;
     }
 
@@ -28,7 +43,7 @@ class IntegerToken : public IToken{
 public:
     long Value;
 
-    explicit IntegerToken(long value){
+    explicit IntegerToken(long value, unsigned int position, unsigned int length) : IToken(position, length){
         Value = value;
     }
 
@@ -41,7 +56,7 @@ class FloatToken : public IToken{
 public:
     double Value;
 
-    explicit FloatToken(double value){
+    explicit FloatToken(double value, unsigned int position, unsigned int length) : IToken(position, length){
         Value = value;
     }
 
@@ -54,7 +69,7 @@ class IdentifierToken : public IToken{
 public:
     string Value;
 
-    explicit IdentifierToken(string value){
+    explicit IdentifierToken(string value, unsigned int position, unsigned int length) : IToken(position, length){
         Value = std::move(value);
     }