From 7edd51d5e3c8f5a25899ecfd72219bf32ba00bf3 Mon Sep 17 00:00:00 2001 From: Deukhoofd Date: Sun, 19 May 2019 16:11:16 +0200 Subject: [PATCH] Save position and length of tokens --- src/Parser/Lexer.cpp | 66 +++++++++++++++++++++------------------ src/Parser/LexerTests.cpp | 30 +++++++++++++++++- src/Parser/Token.hpp | 23 +++++++++++--- 3 files changed, 84 insertions(+), 35 deletions(-) diff --git a/src/Parser/Lexer.cpp b/src/Parser/Lexer.cpp index 279ccc9..907eb13 100644 --- a/src/Parser/Lexer.cpp +++ b/src/Parser/Lexer.cpp @@ -34,23 +34,23 @@ char Lexer::Next(){ IToken* Lexer::LexNext(char c){ switch (c) { case '\0': - return new SimpleToken(TokenKind::EndOfFile); + return new SimpleToken(TokenKind::EndOfFile, Lexer::Position - 1, 1); case ' ': case '\t': case '\n': case '\r': case '\v': case '\f': - return new SimpleToken(TokenKind::WhiteSpace); + return new SimpleToken(TokenKind::WhiteSpace, Lexer::Position - 1, 1); case '+': - return new SimpleToken(TokenKind::PlusToken); + return new SimpleToken(TokenKind::PlusToken, Lexer::Position - 1, 1); case '-': - return new SimpleToken(TokenKind::MinusToken); + return new SimpleToken(TokenKind::MinusToken, Lexer::Position - 1, 1); case '/': - return new SimpleToken(TokenKind::SlashToken); + return new SimpleToken(TokenKind::SlashToken, Lexer::Position - 1, 1); case '*': - return new SimpleToken(TokenKind::StarToken); + return new SimpleToken(TokenKind::StarToken, Lexer::Position - 1, 1); case '=': if (Lexer::Peek() == '='){ Lexer::Next(); - return new SimpleToken(TokenKind::EqualityToken); + return new SimpleToken(TokenKind::EqualityToken, Lexer::Position - 2, 2); } - return new SimpleToken(TokenKind::AssignmentToken); + return new SimpleToken(TokenKind::AssignmentToken, Lexer::Position - 1, 1); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return LexNumber(c); case '_': @@ -85,6 +85,8 @@ IToken* Lexer::LexNumber(char c){ short decimal_index = 0; bool has_point = false; bool is_searching = true; + unsigned int start = Lexer::Position - 1; + unsigned int length = 1; while (is_searching){ char next = Lexer::Peek(); int next_val = CharToInt(next); @@ -92,12 +94,14 @@ IToken* Lexer::LexNumber(char c){ switch (next){ case '_': Lexer::Next(); + length++; continue; case '.': Lexer::Next(); has_point = true; decimal_index = 0; float_value = int_value; + length++; continue; default: is_searching = false; @@ -106,6 +110,7 @@ IToken* Lexer::LexNumber(char c){ } else{ Lexer::Next(); + length++; if (has_point){ decimal_index++; float_value += next_val / pow(10, decimal_index); @@ -117,10 +122,10 @@ IToken* Lexer::LexNumber(char c){ } } if (has_point){ - return new FloatToken(float_value); + return new FloatToken(float_value, start, length); } else{ - return new IntegerToken(int_value); + return new IntegerToken(int_value, start, length); } } @@ -132,6 +137,7 @@ unsigned constexpr const_hash(char const *input) { IToken* Lexer::LexIdentifierOrKeyword(char c){ vector charVec(1, c); + auto start = Lexer::Position - 1; while (true){ char next = Lexer::Peek(); if (next == '\0') break; @@ -145,25 +151,25 @@ IToken* Lexer::LexIdentifierOrKeyword(char c){ } string s = string(charVec.begin(), charVec.end()); switch (const_hash(s.c_str())){ - case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword); - case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword); - case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword); - case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword); - case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword); - case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword); - case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword); - case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword); - case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword); - case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword); - case const_hash("in"): return new SimpleToken(TokenKind::InKeyword); - case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword); - case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword); - case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword); - case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword); - case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword); - case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword); - case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword); - case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword); - default: return new IdentifierToken(s); + case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3); + case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5); + case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2); + case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4); + case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6); + case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3); + case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5); + case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3); + case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8); + case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2); + case const_hash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2); + case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5); + case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3); + case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3); + case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2); + case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6); + case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4); + case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4); + case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5); + default: return new IdentifierToken(s, start, s.length()); } } \ No newline at end of file diff --git a/src/Parser/LexerTests.cpp b/src/Parser/LexerTests.cpp index 1658882..82e862b 100644 --- a/src/Parser/LexerTests.cpp +++ b/src/Parser/LexerTests.cpp @@ -73,7 +73,7 @@ TEST_CASE( "Lex Whitespace", "[lexer]" ) { CHECK(lexer.LexNext('\f') -> GetKind() == TokenKind::WhiteSpace); } -TEST_CASE( "Lex Basic Integers", "[lexer]" ) { +TEST_CASE( "Lex Basic Digits", "[lexer]" ) { Lexer lexer = Lexer(""); CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer); CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer); @@ -255,4 +255,32 @@ TEST_CASE( "Lex identifier", "[lexer]" ) { REQUIRE(firstToken -> GetKind() == TokenKind::Identifier); REQUIRE(((IdentifierToken*)firstToken) -> Value == "foo"); } + +TEST_CASE( "Lex Start Position", "[lexer]" ) { + Lexer lexer = Lexer("+ - bar 1234"); + auto tokens = lexer.Lex(); + REQUIRE(tokens.size() == 8); + CHECK(((IdentifierToken*)tokens[0]) -> GetStartPosition() == 0); + CHECK(((IdentifierToken*)tokens[1]) -> GetStartPosition() == 1); + CHECK(((IdentifierToken*)tokens[2]) -> GetStartPosition() == 2); + CHECK(((IdentifierToken*)tokens[3]) -> GetStartPosition() == 3); + CHECK(((IdentifierToken*)tokens[4]) -> GetStartPosition() == 4); + CHECK(((IdentifierToken*)tokens[5]) -> GetStartPosition() == 7); + CHECK(((IdentifierToken*)tokens[6]) -> GetStartPosition() == 8); + CHECK(((IdentifierToken*)tokens[7]) -> GetStartPosition() == 12); +} + +TEST_CASE( "Lex End Position", "[lexer]" ) { + Lexer lexer = Lexer("+ - bar 1234"); + auto tokens = lexer.Lex(); + REQUIRE(tokens.size() == 8); + CHECK(((IdentifierToken*)tokens[0]) -> GetEndPosition() == 0); + CHECK(((IdentifierToken*)tokens[1]) -> GetEndPosition() == 1); + CHECK(((IdentifierToken*)tokens[2]) -> GetEndPosition() == 2); + CHECK(((IdentifierToken*)tokens[3]) -> GetEndPosition() == 3); + CHECK(((IdentifierToken*)tokens[4]) -> GetEndPosition() == 6); + CHECK(((IdentifierToken*)tokens[5]) -> GetEndPosition() == 7); + CHECK(((IdentifierToken*)tokens[6]) -> GetEndPosition() == 11); + CHECK(((IdentifierToken*)tokens[7]) -> GetEndPosition() == 12); +} #endif \ No newline at end of file diff --git a/src/Parser/Token.hpp b/src/Parser/Token.hpp index dfa1b5a..96a0c28 100644 --- a/src/Parser/Token.hpp +++ b/src/Parser/Token.hpp @@ -7,15 +7,30 @@ using namespace std; class IToken{ + unsigned int Position; + unsigned int Length; public: virtual TokenKind GetKind() = 0; + + IToken(unsigned int position, unsigned int length){ + Position = position; + Length = length; + } + + unsigned int GetStartPosition(){ + return Position; + } + + unsigned int GetEndPosition(){ + return Position + Length - 1; + } }; class SimpleToken : public IToken{ public: TokenKind Kind; - explicit SimpleToken(TokenKind type){ + explicit SimpleToken(TokenKind type, unsigned int position, unsigned int length) : IToken(position, length){ Kind = type; } @@ -28,7 +43,7 @@ class IntegerToken : public IToken{ public: long Value; - explicit IntegerToken(long value){ + explicit IntegerToken(long value, unsigned int position, unsigned int length) : IToken(position, length){ Value = value; } @@ -41,7 +56,7 @@ class FloatToken : public IToken{ public: double Value; - explicit FloatToken(double value){ + explicit FloatToken(double value, unsigned int position, unsigned int length) : IToken(position, length){ Value = value; } @@ -54,7 +69,7 @@ class IdentifierToken : public IToken{ public: string Value; - explicit IdentifierToken(string value){ + explicit IdentifierToken(string value, unsigned int position, unsigned int length) : IToken(position, length){ Value = std::move(value); }