Minor optimizations in lexer
This commit is contained in:
parent
bda26b0ddf
commit
d49692a17e
|
@ -5,12 +5,21 @@
|
||||||
|
|
||||||
#include "Lexer.hpp"
|
#include "Lexer.hpp"
|
||||||
|
|
||||||
Lexer::Lexer(string scriptString, class Script* script) {
|
Lexer::Lexer(string* scriptString, class Script* script) {
|
||||||
this -> _scriptString = std::move(scriptString);
|
this -> _scriptString = scriptString;
|
||||||
|
this->_scriptSize = scriptString->size();
|
||||||
this -> ScriptData = script;
|
this -> ScriptData = script;
|
||||||
this -> Position = 0;
|
this -> _position = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Lexer::Lexer(string scriptString, class Script *script) {
|
||||||
|
this -> _scriptString = &scriptString;
|
||||||
|
this->_scriptSize = scriptString.size();
|
||||||
|
this -> ScriptData = script;
|
||||||
|
this -> _position = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
vector<IToken*> Lexer::Lex() {
|
vector<IToken*> Lexer::Lex() {
|
||||||
vector<IToken*> tokens;
|
vector<IToken*> tokens;
|
||||||
while (true){
|
while (true){
|
||||||
|
@ -27,50 +36,50 @@ vector<IToken*> Lexer::Lex() {
|
||||||
}
|
}
|
||||||
|
|
||||||
char Lexer::Peek(){
|
char Lexer::Peek(){
|
||||||
if (Lexer::Position > this -> _scriptString.length())
|
if (Lexer::_position >= this -> _scriptSize)
|
||||||
return '\0';
|
return '\0';
|
||||||
return this -> _scriptString[Lexer::Position];
|
return this -> _scriptString->at(Lexer::_position);
|
||||||
}
|
}
|
||||||
|
|
||||||
char Lexer::Next(){
|
char Lexer::Next(){
|
||||||
char next = Peek();
|
char next = Peek();
|
||||||
Lexer::Position++;
|
Lexer::_position++;
|
||||||
return next;
|
return next;
|
||||||
}
|
}
|
||||||
|
|
||||||
IToken* Lexer::LexNext(char c){
|
IToken* Lexer::LexNext(char c){
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '\0':
|
case '\0':
|
||||||
return new SimpleToken(TokenKind::EndOfFile, this -> Position - 1, 1);
|
return new SimpleToken(TokenKind::EndOfFile, this -> _position - 1, 1);
|
||||||
case ' ': case '\t': case '\n': case '\r': case '\v': case '\f':
|
case ' ': case '\t': case '\n': case '\r': case '\v': case '\f':
|
||||||
return new SimpleToken(TokenKind::WhiteSpace, this -> Position - 1, 1);
|
return new SimpleToken(TokenKind::WhiteSpace, this -> _position - 1, 1);
|
||||||
case '+':
|
case '+':
|
||||||
return new SimpleToken(TokenKind::PlusToken, this -> Position - 1, 1);
|
return new SimpleToken(TokenKind::PlusToken, this -> _position - 1, 1);
|
||||||
case '-':
|
case '-':
|
||||||
return new SimpleToken(TokenKind::MinusToken, this -> Position - 1, 1);
|
return new SimpleToken(TokenKind::MinusToken, this -> _position - 1, 1);
|
||||||
case '/':
|
case '/':
|
||||||
return new SimpleToken(TokenKind::SlashToken, this -> Position - 1, 1);
|
return new SimpleToken(TokenKind::SlashToken, this -> _position - 1, 1);
|
||||||
case '*':
|
case '*':
|
||||||
return new SimpleToken(TokenKind::StarToken, this -> Position - 1, 1);
|
return new SimpleToken(TokenKind::StarToken, this -> _position - 1, 1);
|
||||||
case '(':
|
case '(':
|
||||||
return new SimpleToken(TokenKind::OpenParenthesis, this -> Position - 1, 1);
|
return new SimpleToken(TokenKind::OpenParenthesis, this -> _position - 1, 1);
|
||||||
case ')':
|
case ')':
|
||||||
return new SimpleToken(TokenKind::CloseParenthesis, this -> Position - 1, 1);
|
return new SimpleToken(TokenKind::CloseParenthesis, this -> _position - 1, 1);
|
||||||
case ',':
|
case ',':
|
||||||
return new SimpleToken(TokenKind::CommaToken, this -> Position - 1, 1);
|
return new SimpleToken(TokenKind::CommaToken, this -> _position - 1, 1);
|
||||||
case '=':
|
case '=':
|
||||||
if (Lexer::Peek() == '='){
|
if (Lexer::Peek() == '='){
|
||||||
Lexer::Next();
|
Lexer::Next();
|
||||||
return new SimpleToken(TokenKind::EqualityToken, this -> Position - 2, 2);
|
return new SimpleToken(TokenKind::EqualityToken, this -> _position - 2, 2);
|
||||||
}
|
}
|
||||||
return new SimpleToken(TokenKind::AssignmentToken, this -> Position - 1, 1);
|
return new SimpleToken(TokenKind::AssignmentToken, this -> _position - 1, 1);
|
||||||
case '~':
|
case '~':
|
||||||
if (Lexer::Peek() == '='){
|
if (Lexer::Peek() == '='){
|
||||||
Lexer::Next();
|
Lexer::Next();
|
||||||
return new SimpleToken(TokenKind::InequalityToken, this -> Position - 2, 2);
|
return new SimpleToken(TokenKind::InequalityToken, this -> _position - 2, 2);
|
||||||
}
|
}
|
||||||
this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> Position - 1, 1);
|
this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1);
|
||||||
return new SimpleToken(TokenKind::BadToken, this -> Position - 1, 1);
|
return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1);
|
||||||
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
||||||
return LexNumber(c);
|
return LexNumber(c);
|
||||||
case '"':
|
case '"':
|
||||||
|
@ -83,8 +92,8 @@ IToken* Lexer::LexNext(char c){
|
||||||
if (isalpha(c)){
|
if (isalpha(c)){
|
||||||
return LexIdentifierOrKeyword();
|
return LexIdentifierOrKeyword();
|
||||||
}
|
}
|
||||||
this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> Position - 1, 1);
|
this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1);
|
||||||
return new SimpleToken(TokenKind::BadToken, this -> Position - 1, 1);
|
return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,7 +119,7 @@ IToken* Lexer::LexNumber(char c){
|
||||||
short decimal_index = 0;
|
short decimal_index = 0;
|
||||||
bool has_point = false;
|
bool has_point = false;
|
||||||
bool is_searching = true;
|
bool is_searching = true;
|
||||||
unsigned int start = this -> Position - 1;
|
unsigned int start = this -> _position - 1;
|
||||||
unsigned int length = 1;
|
unsigned int length = 1;
|
||||||
while (is_searching){
|
while (is_searching){
|
||||||
char next = this -> Peek();
|
char next = this -> Peek();
|
||||||
|
@ -154,14 +163,8 @@ IToken* Lexer::LexNumber(char c){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned constexpr const_hash(char const *input) {
|
|
||||||
return *input ?
|
|
||||||
static_cast<unsigned int>(*input) + 33 * const_hash(input + 1) :
|
|
||||||
5381;
|
|
||||||
}
|
|
||||||
|
|
||||||
IToken * Lexer::LexIdentifierOrKeyword() {
|
IToken * Lexer::LexIdentifierOrKeyword() {
|
||||||
auto start = this -> Position - 1;
|
auto start = this -> _position - 1;
|
||||||
auto end = start;
|
auto end = start;
|
||||||
while (true){
|
while (true){
|
||||||
char next = this -> Peek();
|
char next = this -> Peek();
|
||||||
|
@ -175,27 +178,27 @@ IToken * Lexer::LexIdentifierOrKeyword() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
string s = this -> _scriptString.substr(start, end - start + 1);
|
string s = this -> _scriptString->substr(start, end - start + 1);
|
||||||
switch (const_hash(s.c_str())){
|
switch (HashedString::ConstHash(s.c_str())){
|
||||||
case const_hash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
|
case HashedString::ConstHash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
|
||||||
case const_hash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
|
case HashedString::ConstHash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
|
||||||
case const_hash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2);
|
case HashedString::ConstHash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2);
|
||||||
case const_hash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4);
|
case HashedString::ConstHash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4);
|
||||||
case const_hash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6);
|
case HashedString::ConstHash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6);
|
||||||
case const_hash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3);
|
case HashedString::ConstHash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3);
|
||||||
case const_hash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5);
|
case HashedString::ConstHash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5);
|
||||||
case const_hash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3);
|
case HashedString::ConstHash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3);
|
||||||
case const_hash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8);
|
case HashedString::ConstHash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8);
|
||||||
case const_hash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2);
|
case HashedString::ConstHash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2);
|
||||||
case const_hash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2);
|
case HashedString::ConstHash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2);
|
||||||
case const_hash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5);
|
case HashedString::ConstHash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5);
|
||||||
case const_hash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3);
|
case HashedString::ConstHash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3);
|
||||||
case const_hash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3);
|
case HashedString::ConstHash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3);
|
||||||
case const_hash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2);
|
case HashedString::ConstHash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2);
|
||||||
case const_hash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6);
|
case HashedString::ConstHash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6);
|
||||||
case const_hash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4);
|
case HashedString::ConstHash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4);
|
||||||
case const_hash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4);
|
case HashedString::ConstHash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4);
|
||||||
case const_hash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5);
|
case HashedString::ConstHash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5);
|
||||||
default: return new IdentifierToken(s, start, s.length());
|
default: return new IdentifierToken(s, start, s.length());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -216,7 +219,7 @@ const unordered_map<char, char> ControlCharacters{
|
||||||
};
|
};
|
||||||
|
|
||||||
IToken* Lexer::LexString(char c){
|
IToken* Lexer::LexString(char c){
|
||||||
auto start = this -> Position - 1;
|
auto start = this -> _position - 1;
|
||||||
auto end = start;
|
auto end = start;
|
||||||
char last = c;
|
char last = c;
|
||||||
while (true){
|
while (true){
|
||||||
|
@ -229,11 +232,11 @@ IToken* Lexer::LexString(char c){
|
||||||
}
|
}
|
||||||
auto closeToken = this -> Next();
|
auto closeToken = this -> Next();
|
||||||
if (closeToken != c){
|
if (closeToken != c){
|
||||||
this -> ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedCharacter, this->Position - 1, 1);
|
this -> ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedCharacter, this->_position - 1, 1);
|
||||||
return new SimpleToken(TokenKind::BadToken, start, end -start + 1);
|
return new SimpleToken(TokenKind::BadToken, start, end -start + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
string s = this -> _scriptString.substr(start + 1, end - start);
|
string s = this -> _scriptString->substr(start + 1, end - start);
|
||||||
stringstream stream;
|
stringstream stream;
|
||||||
for (int i = 0; i < s.size(); i++){
|
for (int i = 0; i < s.size(); i++){
|
||||||
c = s[i];
|
c = s[i];
|
||||||
|
@ -252,3 +255,4 @@ IToken* Lexer::LexString(char c){
|
||||||
}
|
}
|
||||||
return new StringToken(stream.str(), start, end - start );
|
return new StringToken(stream.str(), start, end - start );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,11 +8,12 @@
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
class Lexer {
|
class Lexer {
|
||||||
string _scriptString;
|
string* _scriptString;
|
||||||
#ifdef TESTS_BUILD
|
#ifdef TESTS_BUILD
|
||||||
public:
|
public:
|
||||||
#endif
|
#endif
|
||||||
unsigned int Position;
|
unsigned int _position;
|
||||||
|
unsigned int _scriptSize;
|
||||||
char Peek();
|
char Peek();
|
||||||
char Next();
|
char Next();
|
||||||
IToken* LexNext(char c);
|
IToken* LexNext(char c);
|
||||||
|
@ -23,6 +24,7 @@ public:
|
||||||
Script* ScriptData;
|
Script* ScriptData;
|
||||||
|
|
||||||
vector<IToken*> Lex();
|
vector<IToken*> Lex();
|
||||||
|
explicit Lexer(string* scriptString, class Script* script);
|
||||||
explicit Lexer(string scriptString, class Script* script);
|
explicit Lexer(string scriptString, class Script* script);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -33,7 +33,7 @@ Script::~Script() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Script::Parse(string script) {
|
void Script::Parse(string script) {
|
||||||
auto lexer = Lexer(std::move(script), this);
|
auto lexer = Lexer(&script, this);
|
||||||
auto lexResult = lexer.Lex();
|
auto lexResult = lexer.Lex();
|
||||||
auto parser = Parser(lexResult, this);
|
auto parser = Parser(lexResult, this);
|
||||||
auto parseResult = parser.Parse();
|
auto parseResult = parser.Parse();
|
||||||
|
|
Loading…
Reference in New Issue