Save position and length of tokens
This commit is contained in:
parent
608a579f19
commit
7edd51d5e3
|
@ -34,23 +34,23 @@ char Lexer::Next(){
|
||||||
IToken* Lexer::LexNext(char c){
|
IToken* Lexer::LexNext(char c){
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '\0':
|
case '\0':
|
||||||
return new SimpleToken(TokenKind::EndOfFile);
|
return new SimpleToken(TokenKind::EndOfFile, Lexer::Position - 1, 1);
|
||||||
case ' ': case '\t': case '\n': case '\r': case '\v': case '\f':
|
case ' ': case '\t': case '\n': case '\r': case '\v': case '\f':
|
||||||
return new SimpleToken(TokenKind::WhiteSpace);
|
return new SimpleToken(TokenKind::WhiteSpace, Lexer::Position - 1, 1);
|
||||||
case '+':
|
case '+':
|
||||||
return new SimpleToken(TokenKind::PlusToken);
|
return new SimpleToken(TokenKind::PlusToken, Lexer::Position - 1, 1);
|
||||||
case '-':
|
case '-':
|
||||||
return new SimpleToken(TokenKind::MinusToken);
|
return new SimpleToken(TokenKind::MinusToken, Lexer::Position - 1, 1);
|
||||||
case '/':
|
case '/':
|
||||||
return new SimpleToken(TokenKind::SlashToken);
|
return new SimpleToken(TokenKind::SlashToken, Lexer::Position - 1, 1);
|
||||||
case '*':
|
case '*':
|
||||||
return new SimpleToken(TokenKind::StarToken);
|
return new SimpleToken(TokenKind::StarToken, Lexer::Position - 1, 1);
|
||||||
case '=':
|
case '=':
|
||||||
if (Lexer::Peek() == '='){
|
if (Lexer::Peek() == '='){
|
||||||
Lexer::Next();
|
Lexer::Next();
|
||||||
return new SimpleToken(TokenKind::EqualityToken);
|
return new SimpleToken(TokenKind::EqualityToken, Lexer::Position - 2, 2);
|
||||||
}
|
}
|
||||||
return new SimpleToken(TokenKind::AssignmentToken);
|
return new SimpleToken(TokenKind::AssignmentToken, Lexer::Position - 1, 1);
|
||||||
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
||||||
return LexNumber(c);
|
return LexNumber(c);
|
||||||
case '_':
|
case '_':
|
||||||
|
@ -85,6 +85,8 @@ IToken* Lexer::LexNumber(char c){
|
||||||
short decimal_index = 0;
|
short decimal_index = 0;
|
||||||
bool has_point = false;
|
bool has_point = false;
|
||||||
bool is_searching = true;
|
bool is_searching = true;
|
||||||
|
unsigned int start = Lexer::Position - 1;
|
||||||
|
unsigned int length = 1;
|
||||||
while (is_searching){
|
while (is_searching){
|
||||||
char next = Lexer::Peek();
|
char next = Lexer::Peek();
|
||||||
int next_val = CharToInt(next);
|
int next_val = CharToInt(next);
|
||||||
|
@ -92,12 +94,14 @@ IToken* Lexer::LexNumber(char c){
|
||||||
switch (next){
|
switch (next){
|
||||||
case '_':
|
case '_':
|
||||||
Lexer::Next();
|
Lexer::Next();
|
||||||
|
length++;
|
||||||
continue;
|
continue;
|
||||||
case '.':
|
case '.':
|
||||||
Lexer::Next();
|
Lexer::Next();
|
||||||
has_point = true;
|
has_point = true;
|
||||||
decimal_index = 0;
|
decimal_index = 0;
|
||||||
float_value = int_value;
|
float_value = int_value;
|
||||||
|
length++;
|
||||||
continue;
|
continue;
|
||||||
default:
|
default:
|
||||||
is_searching = false;
|
is_searching = false;
|
||||||
|
@ -106,6 +110,7 @@ IToken* Lexer::LexNumber(char c){
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
Lexer::Next();
|
Lexer::Next();
|
||||||
|
length++;
|
||||||
if (has_point){
|
if (has_point){
|
||||||
decimal_index++;
|
decimal_index++;
|
||||||
float_value += next_val / pow(10, decimal_index);
|
float_value += next_val / pow(10, decimal_index);
|
||||||
|
@ -117,10 +122,10 @@ IToken* Lexer::LexNumber(char c){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (has_point){
|
if (has_point){
|
||||||
return new FloatToken(float_value);
|
return new FloatToken(float_value, start, length);
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
return new IntegerToken(int_value);
|
return new IntegerToken(int_value, start, length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -132,6 +137,7 @@ unsigned constexpr const_hash(char const *input) {
|
||||||
|
|
||||||
IToken* Lexer::LexIdentifierOrKeyword(char c){
|
IToken* Lexer::LexIdentifierOrKeyword(char c){
|
||||||
vector<char> charVec(1, c);
|
vector<char> charVec(1, c);
|
||||||
|
auto start = Lexer::Position - 1;
|
||||||
while (true){
|
while (true){
|
||||||
char next = Lexer::Peek();
|
char next = Lexer::Peek();
|
||||||
if (next == '\0') break;
|
if (next == '\0') break;
|
||||||
|
@ -145,25 +151,25 @@ IToken* Lexer::LexIdentifierOrKeyword(char c){
|
||||||
}
|
}
|
||||||
string s = string(charVec.begin(), charVec.end());
|
string s = string(charVec.begin(), charVec.end());
|
||||||
switch (const_hash(s.c_str())){
|
switch (const_hash(s.c_str())){
|
||||||
case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword);
|
case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
|
||||||
case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword);
|
case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
|
||||||
case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword);
|
case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2);
|
||||||
case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword);
|
case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4);
|
||||||
case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword);
|
case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6);
|
||||||
case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword);
|
case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3);
|
||||||
case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword);
|
case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5);
|
||||||
case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword);
|
case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3);
|
||||||
case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword);
|
case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8);
|
||||||
case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword);
|
case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2);
|
||||||
case const_hash("in"): return new SimpleToken(TokenKind::InKeyword);
|
case const_hash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2);
|
||||||
case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword);
|
case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5);
|
||||||
case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword);
|
case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3);
|
||||||
case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword);
|
case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3);
|
||||||
case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword);
|
case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2);
|
||||||
case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword);
|
case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6);
|
||||||
case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword);
|
case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4);
|
||||||
case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword);
|
case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4);
|
||||||
case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword);
|
case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5);
|
||||||
default: return new IdentifierToken(s);
|
default: return new IdentifierToken(s, start, s.length());
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -73,7 +73,7 @@ TEST_CASE( "Lex Whitespace", "[lexer]" ) {
|
||||||
CHECK(lexer.LexNext('\f') -> GetKind() == TokenKind::WhiteSpace);
|
CHECK(lexer.LexNext('\f') -> GetKind() == TokenKind::WhiteSpace);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE( "Lex Basic Integers", "[lexer]" ) {
|
TEST_CASE( "Lex Basic Digits", "[lexer]" ) {
|
||||||
Lexer lexer = Lexer("");
|
Lexer lexer = Lexer("");
|
||||||
CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer);
|
CHECK(lexer.LexNext('0') -> GetKind() == TokenKind::Integer);
|
||||||
CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer);
|
CHECK(lexer.LexNext('1') -> GetKind() == TokenKind::Integer);
|
||||||
|
@ -255,4 +255,32 @@ TEST_CASE( "Lex identifier", "[lexer]" ) {
|
||||||
REQUIRE(firstToken -> GetKind() == TokenKind::Identifier);
|
REQUIRE(firstToken -> GetKind() == TokenKind::Identifier);
|
||||||
REQUIRE(((IdentifierToken*)firstToken) -> Value == "foo");
|
REQUIRE(((IdentifierToken*)firstToken) -> Value == "foo");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE( "Lex Start Position", "[lexer]" ) {
|
||||||
|
Lexer lexer = Lexer("+ - bar 1234");
|
||||||
|
auto tokens = lexer.Lex();
|
||||||
|
REQUIRE(tokens.size() == 8);
|
||||||
|
CHECK(((IdentifierToken*)tokens[0]) -> GetStartPosition() == 0);
|
||||||
|
CHECK(((IdentifierToken*)tokens[1]) -> GetStartPosition() == 1);
|
||||||
|
CHECK(((IdentifierToken*)tokens[2]) -> GetStartPosition() == 2);
|
||||||
|
CHECK(((IdentifierToken*)tokens[3]) -> GetStartPosition() == 3);
|
||||||
|
CHECK(((IdentifierToken*)tokens[4]) -> GetStartPosition() == 4);
|
||||||
|
CHECK(((IdentifierToken*)tokens[5]) -> GetStartPosition() == 7);
|
||||||
|
CHECK(((IdentifierToken*)tokens[6]) -> GetStartPosition() == 8);
|
||||||
|
CHECK(((IdentifierToken*)tokens[7]) -> GetStartPosition() == 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE( "Lex End Position", "[lexer]" ) {
|
||||||
|
Lexer lexer = Lexer("+ - bar 1234");
|
||||||
|
auto tokens = lexer.Lex();
|
||||||
|
REQUIRE(tokens.size() == 8);
|
||||||
|
CHECK(((IdentifierToken*)tokens[0]) -> GetEndPosition() == 0);
|
||||||
|
CHECK(((IdentifierToken*)tokens[1]) -> GetEndPosition() == 1);
|
||||||
|
CHECK(((IdentifierToken*)tokens[2]) -> GetEndPosition() == 2);
|
||||||
|
CHECK(((IdentifierToken*)tokens[3]) -> GetEndPosition() == 3);
|
||||||
|
CHECK(((IdentifierToken*)tokens[4]) -> GetEndPosition() == 6);
|
||||||
|
CHECK(((IdentifierToken*)tokens[5]) -> GetEndPosition() == 7);
|
||||||
|
CHECK(((IdentifierToken*)tokens[6]) -> GetEndPosition() == 11);
|
||||||
|
CHECK(((IdentifierToken*)tokens[7]) -> GetEndPosition() == 12);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
|
@ -7,15 +7,30 @@
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
class IToken{
|
class IToken{
|
||||||
|
unsigned int Position;
|
||||||
|
unsigned int Length;
|
||||||
public:
|
public:
|
||||||
virtual TokenKind GetKind() = 0;
|
virtual TokenKind GetKind() = 0;
|
||||||
|
|
||||||
|
IToken(unsigned int position, unsigned int length){
|
||||||
|
Position = position;
|
||||||
|
Length = length;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int GetStartPosition(){
|
||||||
|
return Position;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int GetEndPosition(){
|
||||||
|
return Position + Length - 1;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class SimpleToken : public IToken{
|
class SimpleToken : public IToken{
|
||||||
public:
|
public:
|
||||||
TokenKind Kind;
|
TokenKind Kind;
|
||||||
|
|
||||||
explicit SimpleToken(TokenKind type){
|
explicit SimpleToken(TokenKind type, unsigned int position, unsigned int length) : IToken(position, length){
|
||||||
Kind = type;
|
Kind = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,7 +43,7 @@ class IntegerToken : public IToken{
|
||||||
public:
|
public:
|
||||||
long Value;
|
long Value;
|
||||||
|
|
||||||
explicit IntegerToken(long value){
|
explicit IntegerToken(long value, unsigned int position, unsigned int length) : IToken(position, length){
|
||||||
Value = value;
|
Value = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,7 +56,7 @@ class FloatToken : public IToken{
|
||||||
public:
|
public:
|
||||||
double Value;
|
double Value;
|
||||||
|
|
||||||
explicit FloatToken(double value){
|
explicit FloatToken(double value, unsigned int position, unsigned int length) : IToken(position, length){
|
||||||
Value = value;
|
Value = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,7 +69,7 @@ class IdentifierToken : public IToken{
|
||||||
public:
|
public:
|
||||||
string Value;
|
string Value;
|
||||||
|
|
||||||
explicit IdentifierToken(string value){
|
explicit IdentifierToken(string value, unsigned int position, unsigned int length) : IToken(position, length){
|
||||||
Value = std::move(value);
|
Value = std::move(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue