2019-05-18 18:35:51 +00:00
|
|
|
#include <utility>
|
|
|
|
#include <cmath>
|
|
|
|
|
|
|
|
#include "Lexer.hpp"
|
|
|
|
|
2019-05-21 11:56:08 +00:00
|
|
|
Lexer::Lexer(string scriptString, class Script* script) {
|
|
|
|
this -> ScriptString = std::move(scriptString);
|
|
|
|
this -> ScriptData = script;
|
|
|
|
this -> Position = 0;
|
2019-05-18 18:35:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
vector<IToken*> Lexer::Lex() {
|
|
|
|
vector<IToken*> tokens;
|
|
|
|
while (true){
|
2019-05-21 11:56:08 +00:00
|
|
|
IToken* next = this -> LexNext(this -> Next());
|
2019-05-18 18:35:51 +00:00
|
|
|
tokens.push_back(next);
|
|
|
|
if (next->GetKind() == TokenKind::EndOfFile)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return tokens;
|
|
|
|
}
|
|
|
|
|
|
|
|
char Lexer::Peek(){
|
2019-05-21 11:56:08 +00:00
|
|
|
if (Lexer::Position > this -> ScriptString.length())
|
2019-05-18 18:35:51 +00:00
|
|
|
return '\0';
|
2019-05-21 11:56:08 +00:00
|
|
|
return this -> ScriptString[Lexer::Position];
|
2019-05-18 18:35:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
char Lexer::Next(){
|
|
|
|
char next = Peek();
|
|
|
|
Lexer::Position++;
|
|
|
|
return next;
|
|
|
|
}
|
|
|
|
|
|
|
|
IToken* Lexer::LexNext(char c){
|
|
|
|
switch (c) {
|
|
|
|
case '\0':
|
2019-05-21 11:56:08 +00:00
|
|
|
return new SimpleToken(TokenKind::EndOfFile, this -> Position - 1, 1);
|
2019-05-19 13:28:45 +00:00
|
|
|
case ' ': case '\t': case '\n': case '\r': case '\v': case '\f':
|
2019-05-21 11:56:08 +00:00
|
|
|
return new SimpleToken(TokenKind::WhiteSpace, this -> Position - 1, 1);
|
2019-05-19 10:49:26 +00:00
|
|
|
case '+':
|
2019-05-21 11:56:08 +00:00
|
|
|
return new SimpleToken(TokenKind::PlusToken, this -> Position - 1, 1);
|
2019-05-19 10:49:26 +00:00
|
|
|
case '-':
|
2019-05-21 11:56:08 +00:00
|
|
|
return new SimpleToken(TokenKind::MinusToken, this -> Position - 1, 1);
|
2019-05-19 10:49:26 +00:00
|
|
|
case '/':
|
2019-05-21 11:56:08 +00:00
|
|
|
return new SimpleToken(TokenKind::SlashToken, this -> Position - 1, 1);
|
2019-05-19 10:49:26 +00:00
|
|
|
case '*':
|
2019-05-21 11:56:08 +00:00
|
|
|
return new SimpleToken(TokenKind::StarToken, this -> Position - 1, 1);
|
2019-05-19 10:49:26 +00:00
|
|
|
case '=':
|
|
|
|
if (Lexer::Peek() == '='){
|
|
|
|
Lexer::Next();
|
2019-05-21 11:56:08 +00:00
|
|
|
return new SimpleToken(TokenKind::EqualityToken, this -> Position - 2, 2);
|
2019-05-19 10:49:26 +00:00
|
|
|
}
|
2019-05-21 11:56:08 +00:00
|
|
|
return new SimpleToken(TokenKind::AssignmentToken, this -> Position - 1, 1);
|
2019-05-18 18:35:51 +00:00
|
|
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
|
|
return LexNumber(c);
|
2019-05-19 12:26:21 +00:00
|
|
|
case '_':
|
|
|
|
return LexIdentifierOrKeyword(c);
|
2019-05-18 18:35:51 +00:00
|
|
|
default:
|
2019-05-19 12:26:21 +00:00
|
|
|
if (isalpha(c)){
|
|
|
|
return LexIdentifierOrKeyword(c);
|
|
|
|
}
|
2019-05-21 11:56:08 +00:00
|
|
|
this -> ScriptData->Diagnostics.LogError(DiagnosticCode::UnexpectedCharacter, this -> Position - 1, 1);
|
|
|
|
return new SimpleToken(TokenKind::BadToken, this -> Position - 1, 1);
|
2019-05-18 18:35:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int CharToInt(char c){
|
|
|
|
switch (c){
|
|
|
|
case '0': return 0;
|
|
|
|
case '1': return 1;
|
|
|
|
case '2': return 2;
|
|
|
|
case '3': return 3;
|
|
|
|
case '4': return 4;
|
|
|
|
case '5': return 5;
|
|
|
|
case '6': return 6;
|
|
|
|
case '7': return 7;
|
|
|
|
case '8': return 8;
|
|
|
|
case '9': return 9;
|
|
|
|
default: return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
IToken* Lexer::LexNumber(char c){
|
|
|
|
long int_value = CharToInt(c);
|
|
|
|
double float_value = 0;
|
|
|
|
short decimal_index = 0;
|
|
|
|
bool has_point = false;
|
|
|
|
bool is_searching = true;
|
2019-05-21 11:56:08 +00:00
|
|
|
unsigned int start = this -> Position - 1;
|
2019-05-19 14:11:16 +00:00
|
|
|
unsigned int length = 1;
|
2019-05-18 18:35:51 +00:00
|
|
|
while (is_searching){
|
2019-05-21 11:56:08 +00:00
|
|
|
char next = this -> Peek();
|
2019-05-18 18:35:51 +00:00
|
|
|
int next_val = CharToInt(next);
|
|
|
|
if (next_val == -1){
|
|
|
|
switch (next){
|
2019-05-19 10:20:08 +00:00
|
|
|
case '_':
|
2019-05-21 11:56:08 +00:00
|
|
|
this -> Next();
|
2019-05-19 14:11:16 +00:00
|
|
|
length++;
|
2019-05-19 10:20:08 +00:00
|
|
|
continue;
|
2019-05-18 18:35:51 +00:00
|
|
|
case '.':
|
2019-05-21 11:56:08 +00:00
|
|
|
this -> Next();
|
2019-05-18 18:35:51 +00:00
|
|
|
has_point = true;
|
|
|
|
decimal_index = 0;
|
|
|
|
float_value = int_value;
|
2019-05-19 14:11:16 +00:00
|
|
|
length++;
|
2019-05-18 18:35:51 +00:00
|
|
|
continue;
|
|
|
|
default:
|
|
|
|
is_searching = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else{
|
2019-05-21 11:56:08 +00:00
|
|
|
this -> Next();
|
2019-05-19 14:11:16 +00:00
|
|
|
length++;
|
2019-05-18 18:35:51 +00:00
|
|
|
if (has_point){
|
|
|
|
decimal_index++;
|
|
|
|
float_value += next_val / pow(10, decimal_index);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
int_value *= 10;
|
|
|
|
int_value += next_val;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (has_point){
|
2019-05-19 14:11:16 +00:00
|
|
|
return new FloatToken(float_value, start, length);
|
2019-05-18 18:35:51 +00:00
|
|
|
}
|
|
|
|
else{
|
2019-05-19 14:11:16 +00:00
|
|
|
return new IntegerToken(int_value, start, length);
|
2019-05-18 18:35:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-19 12:26:21 +00:00
|
|
|
unsigned constexpr const_hash(char const *input) {
|
|
|
|
return *input ?
|
|
|
|
static_cast<unsigned int>(*input) + 33 * const_hash(input + 1) :
|
|
|
|
5381;
|
2019-05-18 18:35:51 +00:00
|
|
|
}
|
|
|
|
|
2019-05-19 12:26:21 +00:00
|
|
|
IToken* Lexer::LexIdentifierOrKeyword(char c){
|
|
|
|
vector<char> charVec(1, c);
|
2019-05-21 11:56:08 +00:00
|
|
|
auto start = this -> Position - 1;
|
2019-05-19 12:26:21 +00:00
|
|
|
while (true){
|
2019-05-21 11:56:08 +00:00
|
|
|
char next = this -> Peek();
|
2019-05-19 12:26:21 +00:00
|
|
|
if (next == '\0') break;
|
|
|
|
if (isalpha(next) || next == '_'){
|
2019-05-21 11:56:08 +00:00
|
|
|
this -> Next();
|
2019-05-19 12:26:21 +00:00
|
|
|
charVec.push_back(next);
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
break;
|
|
|
|
}
|
2019-05-18 18:35:51 +00:00
|
|
|
}
|
2019-05-19 12:26:21 +00:00
|
|
|
string s = string(charVec.begin(), charVec.end());
|
|
|
|
switch (const_hash(s.c_str())){
|
2019-05-19 14:11:16 +00:00
|
|
|
case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
|
|
|
|
case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
|
|
|
|
case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2);
|
|
|
|
case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4);
|
|
|
|
case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6);
|
|
|
|
case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3);
|
|
|
|
case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5);
|
|
|
|
case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3);
|
|
|
|
case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8);
|
|
|
|
case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2);
|
|
|
|
case const_hash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2);
|
|
|
|
case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5);
|
|
|
|
case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3);
|
|
|
|
case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3);
|
|
|
|
case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2);
|
|
|
|
case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6);
|
|
|
|
case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4);
|
|
|
|
case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4);
|
|
|
|
case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5);
|
|
|
|
default: return new IdentifierToken(s, start, s.length());
|
2019-05-19 10:49:26 +00:00
|
|
|
}
|
2019-05-19 12:26:21 +00:00
|
|
|
}
|