Save position and length of tokens

This commit is contained in:
Deukhoofd 2019-05-19 16:11:16 +02:00
parent 608a579f19
commit 7edd51d5e3
No known key found for this signature in database
GPG Key ID: B4C087AC81641654
3 changed files with 84 additions and 35 deletions

View File

@ -34,23 +34,23 @@ char Lexer::Next(){
IToken* Lexer::LexNext(char c){ IToken* Lexer::LexNext(char c){
switch (c) { switch (c) {
case '\0': case '\0':
return new SimpleToken(TokenKind::EndOfFile); return new SimpleToken(TokenKind::EndOfFile, Lexer::Position - 1, 1);
case ' ': case '\t': case '\n': case '\r': case '\v': case '\f': case ' ': case '\t': case '\n': case '\r': case '\v': case '\f':
return new SimpleToken(TokenKind::WhiteSpace); return new SimpleToken(TokenKind::WhiteSpace, Lexer::Position - 1, 1);
case '+': case '+':
return new SimpleToken(TokenKind::PlusToken); return new SimpleToken(TokenKind::PlusToken, Lexer::Position - 1, 1);
case '-': case '-':
return new SimpleToken(TokenKind::MinusToken); return new SimpleToken(TokenKind::MinusToken, Lexer::Position - 1, 1);
case '/': case '/':
return new SimpleToken(TokenKind::SlashToken); return new SimpleToken(TokenKind::SlashToken, Lexer::Position - 1, 1);
case '*': case '*':
return new SimpleToken(TokenKind::StarToken); return new SimpleToken(TokenKind::StarToken, Lexer::Position - 1, 1);
case '=': case '=':
if (Lexer::Peek() == '='){ if (Lexer::Peek() == '='){
Lexer::Next(); Lexer::Next();
return new SimpleToken(TokenKind::EqualityToken); return new SimpleToken(TokenKind::EqualityToken, Lexer::Position - 2, 2);
} }
return new SimpleToken(TokenKind::AssignmentToken); return new SimpleToken(TokenKind::AssignmentToken, Lexer::Position - 1, 1);
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
return LexNumber(c); return LexNumber(c);
case '_': case '_':
@ -85,6 +85,8 @@ IToken* Lexer::LexNumber(char c){
short decimal_index = 0; short decimal_index = 0;
bool has_point = false; bool has_point = false;
bool is_searching = true; bool is_searching = true;
unsigned int start = Lexer::Position - 1;
unsigned int length = 1;
while (is_searching){ while (is_searching){
char next = Lexer::Peek(); char next = Lexer::Peek();
int next_val = CharToInt(next); int next_val = CharToInt(next);
@ -92,12 +94,14 @@ IToken* Lexer::LexNumber(char c){
switch (next){ switch (next){
case '_': case '_':
Lexer::Next(); Lexer::Next();
length++;
continue; continue;
case '.': case '.':
Lexer::Next(); Lexer::Next();
has_point = true; has_point = true;
decimal_index = 0; decimal_index = 0;
float_value = int_value; float_value = int_value;
length++;
continue; continue;
default: default:
is_searching = false; is_searching = false;
@ -106,6 +110,7 @@ IToken* Lexer::LexNumber(char c){
} }
else{ else{
Lexer::Next(); Lexer::Next();
length++;
if (has_point){ if (has_point){
decimal_index++; decimal_index++;
float_value += next_val / pow(10, decimal_index); float_value += next_val / pow(10, decimal_index);
@ -117,10 +122,10 @@ IToken* Lexer::LexNumber(char c){
} }
} }
if (has_point){ if (has_point){
return new FloatToken(float_value); return new FloatToken(float_value, start, length);
} }
else{ else{
return new IntegerToken(int_value); return new IntegerToken(int_value, start, length);
} }
} }
@ -132,6 +137,7 @@ unsigned constexpr const_hash(char const *input) {
IToken* Lexer::LexIdentifierOrKeyword(char c){ IToken* Lexer::LexIdentifierOrKeyword(char c){
vector<char> charVec(1, c); vector<char> charVec(1, c);
auto start = Lexer::Position - 1;
while (true){ while (true){
char next = Lexer::Peek(); char next = Lexer::Peek();
if (next == '\0') break; if (next == '\0') break;
@ -145,25 +151,25 @@ IToken* Lexer::LexIdentifierOrKeyword(char c){
} }
string s = string(charVec.begin(), charVec.end()); string s = string(charVec.begin(), charVec.end());
switch (const_hash(s.c_str())){ switch (const_hash(s.c_str())){
case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword); case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword); case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword); case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2);
case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword); case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4);
case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword); case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6);
case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword); case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3);
case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword); case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5);
case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword); case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3);
case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword); case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8);
case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword); case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2);
case const_hash("in"): return new SimpleToken(TokenKind::InKeyword); case const_hash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2);
case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword); case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5);
case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword); case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3);
case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword); case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3);
case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword); case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2);
case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword); case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6);
case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword); case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4);
case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword); case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4);
case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword); case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5);
default: return new IdentifierToken(s); default: return new IdentifierToken(s, start, s.length());
} }
} }

View File

@ -73,7 +73,7 @@ TEST_CASE( "Lex Whitespace", "[lexer]" ) {
CHECK(lexer.LexNext('\f') -> GetKind() == TokenKind::WhiteSpace); CHECK(lexer.LexNext('\f') -> GetKind() == TokenKind::WhiteSpace);
} }
TEST_CASE( "Lex Basic Integers", "[lexer]" ) { TEST_CASE( "Lex Basic Digits", "[lexer]" ) {
Lexer lexer = Lexer(""); Lexer lexer = Lexer("");
CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer); CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer);
CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer); CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer);
@ -255,4 +255,32 @@ TEST_CASE( "Lex identifier", "[lexer]" ) {
REQUIRE(firstToken -> GetKind() == TokenKind::Identifier); REQUIRE(firstToken -> GetKind() == TokenKind::Identifier);
REQUIRE(((IdentifierToken*)firstToken) -> Value == "foo"); REQUIRE(((IdentifierToken*)firstToken) -> Value == "foo");
} }
TEST_CASE( "Lex Start Position", "[lexer]" ) {
Lexer lexer = Lexer("+ - bar 1234");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 8);
CHECK(((IdentifierToken*)tokens[0]) -> GetStartPosition() == 0);
CHECK(((IdentifierToken*)tokens[1]) -> GetStartPosition() == 1);
CHECK(((IdentifierToken*)tokens[2]) -> GetStartPosition() == 2);
CHECK(((IdentifierToken*)tokens[3]) -> GetStartPosition() == 3);
CHECK(((IdentifierToken*)tokens[4]) -> GetStartPosition() == 4);
CHECK(((IdentifierToken*)tokens[5]) -> GetStartPosition() == 7);
CHECK(((IdentifierToken*)tokens[6]) -> GetStartPosition() == 8);
CHECK(((IdentifierToken*)tokens[7]) -> GetStartPosition() == 12);
}
TEST_CASE( "Lex End Position", "[lexer]" ) {
Lexer lexer = Lexer("+ - bar 1234");
auto tokens = lexer.Lex();
REQUIRE(tokens.size() == 8);
CHECK(((IdentifierToken*)tokens[0]) -> GetEndPosition() == 0);
CHECK(((IdentifierToken*)tokens[1]) -> GetEndPosition() == 1);
CHECK(((IdentifierToken*)tokens[2]) -> GetEndPosition() == 2);
CHECK(((IdentifierToken*)tokens[3]) -> GetEndPosition() == 3);
CHECK(((IdentifierToken*)tokens[4]) -> GetEndPosition() == 6);
CHECK(((IdentifierToken*)tokens[5]) -> GetEndPosition() == 7);
CHECK(((IdentifierToken*)tokens[6]) -> GetEndPosition() == 11);
CHECK(((IdentifierToken*)tokens[7]) -> GetEndPosition() == 12);
}
#endif #endif

View File

@ -7,15 +7,30 @@
using namespace std; using namespace std;
class IToken{ class IToken{
unsigned int Position;
unsigned int Length;
public: public:
virtual TokenKind GetKind() = 0; virtual TokenKind GetKind() = 0;
IToken(unsigned int position, unsigned int length){
Position = position;
Length = length;
}
unsigned int GetStartPosition(){
return Position;
}
unsigned int GetEndPosition(){
return Position + Length - 1;
}
}; };
class SimpleToken : public IToken{ class SimpleToken : public IToken{
public: public:
TokenKind Kind; TokenKind Kind;
explicit SimpleToken(TokenKind type){ explicit SimpleToken(TokenKind type, unsigned int position, unsigned int length) : IToken(position, length){
Kind = type; Kind = type;
} }
@ -28,7 +43,7 @@ class IntegerToken : public IToken{
public: public:
long Value; long Value;
explicit IntegerToken(long value){ explicit IntegerToken(long value, unsigned int position, unsigned int length) : IToken(position, length){
Value = value; Value = value;
} }
@ -41,7 +56,7 @@ class FloatToken : public IToken{
public: public:
double Value; double Value;
explicit FloatToken(double value){ explicit FloatToken(double value, unsigned int position, unsigned int length) : IToken(position, length){
Value = value; Value = value;
} }
@ -54,7 +69,7 @@ class IdentifierToken : public IToken{
public: public:
string Value; string Value;
explicit IdentifierToken(string value){ explicit IdentifierToken(string value, unsigned int position, unsigned int length) : IToken(position, length){
Value = std::move(value); Value = std::move(value);
} }