PorygonLang/src/Parser/Lexer.cpp

#include <utility>
#include <cmath>
#include <unordered_map>
#include <sstream>

#include "Lexer.hpp"

Lexer::Lexer(const u16string& scriptString, class Script* script)
        : _scriptString(scriptString)
{
    this->_scriptSize = scriptString.size();
    this -> ScriptData = script;
    this -> _position = 0;
}


vector<const IToken*> Lexer::Lex() {
    vector<const IToken*> tokens;
    while (true){
        IToken* next = this -> LexNext(this -> Next());
        auto nextKind = next -> GetKind();
        if (nextKind != TokenKind::WhiteSpace)
            tokens.push_back(next);
        else
            delete next;
        if (nextKind == TokenKind::EndOfFile)
            break;
    }
    return tokens;
}

char16_t Lexer::Peek(){
    if (Lexer::_position >= this -> _scriptSize)
        return '\0';
    return this -> _scriptString.at(Lexer::_position);
}

char16_t Lexer::Next(){
    char16_t next = Peek();
    Lexer::_position++;
    return next;
}

IToken* Lexer::LexNext(char16_t c){
    switch (c) {
        case '\0':
            return new SimpleToken(TokenKind::EndOfFile, this -> _position - 1, 1);
        case ' ': case '\t': case '\n': case '\r': case '\v': case '\f':
            return new SimpleToken(TokenKind::WhiteSpace, this -> _position - 1, 1);
        case '+':
            return new SimpleToken(TokenKind::PlusToken, this -> _position - 1, 1);
        case '-':
            return new SimpleToken(TokenKind::MinusToken, this -> _position - 1, 1);
        case '/':
            return new SimpleToken(TokenKind::SlashToken, this -> _position - 1, 1);
        case '*':
            return new SimpleToken(TokenKind::StarToken, this -> _position - 1, 1);
        case '(':
            return new SimpleToken(TokenKind::OpenParenthesis, this -> _position - 1, 1);
        case ')':
            return new SimpleToken(TokenKind::CloseParenthesis, this -> _position - 1, 1);
        case '[':
            return new SimpleToken(TokenKind::OpenSquareBracket, this -> _position - 1, 1);
        case ']':
            return new SimpleToken(TokenKind::CloseSquareBracket, this -> _position - 1, 1);
        case '{':
            return new SimpleToken(TokenKind::OpenCurlyBracket, this -> _position - 1, 1);
        case '}':
            return new SimpleToken(TokenKind::CloseCurlyBracket, this -> _position - 1, 1);
        case ',':
            return new SimpleToken(TokenKind::CommaToken, this -> _position - 1, 1);
        case '.':
            return new SimpleToken(TokenKind::PeriodToken, this -> _position - 1, 1);
        case '=':
            if (Lexer::Peek() == '='){
                Lexer::Next();
                return new SimpleToken(TokenKind::EqualityToken, this -> _position - 2, 2);
            }
            return new SimpleToken(TokenKind::AssignmentToken, this -> _position - 1, 1);
        case '<':
            if (Lexer::Peek() == '='){
                Lexer::Next();
                return new SimpleToken(TokenKind::LessEquals, this -> _position - 2, 2);
            }
            return new SimpleToken(TokenKind::Less, this -> _position - 1, 1);
        case '>':
            if (Lexer::Peek() == '='){
                Lexer::Next();
                return new SimpleToken(TokenKind::GreaterEquals, this -> _position - 2, 2);
            }
            return new SimpleToken(TokenKind::Greater, this -> _position - 1, 1);
        case '~':
            if (Lexer::Peek() == '='){
                Lexer::Next();
                return new SimpleToken(TokenKind::InequalityToken, this -> _position - 2, 2);
            }
            this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1);
            return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1);
        case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
            return LexNumber(c);
        case '"':
            return LexString(c);
        case '\'':
            return LexString(c);
        case '_':
            return LexIdentifierOrKeyword();
        default:
            if (isalpha(c)){
                return LexIdentifierOrKeyword();
            }
            this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1);
            return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1);
    }
}

int CharToInt(char16_t c){
    switch (c){
        case '0': return 0;
        case '1': return 1;
        case '2': return 2;
        case '3': return 3;
        case '4': return 4;
        case '5': return 5;
        case '6': return 6;
        case '7': return 7;
        case '8': return 8;
        case '9': return 9;
        default: return -1;
    }
}

IToken* Lexer::LexNumber(char16_t c){
    long int_value = CharToInt(c);
    double float_value = 0;
    short decimal_index = 0;
    bool has_point = false;
    bool is_searching = true;
    unsigned int start = this -> _position - 1;
    unsigned int length = 1;
    while (is_searching){
        char16_t next = this -> Peek();
        int next_val = CharToInt(next);
        if (next_val == -1){
            switch (next){
                case '_':
                    this -> Next();
                    length++;
                    continue;
                case '.':
                    this -> Next();
                    has_point = true;
                    decimal_index = 0;
                    float_value = int_value;
                    length++;
                    continue;
                default:
                    is_searching = false;
                    continue;
            }
        }
        else{
            this -> Next();
            length++;
            if (has_point){
                decimal_index++;
                float_value += next_val / pow(10, decimal_index);
            }
            else {
                int_value *= 10;
                int_value += next_val;
            }
        }
    }
    if (has_point){
        return new FloatToken(float_value, start, length);
    }
    else{
        return new IntegerToken(int_value, start, length);
    }
}

IToken * Lexer::LexIdentifierOrKeyword() {
    auto start = this -> _position - 1;
    auto end = start;
    while (true){
        char16_t next = this -> Peek();
        if (next == '\0') break;
        if (isalpha(next) || next == '_'){
            this -> Next();
            end++;
        }
        else{
            break;
        }
    }

    u16string s = this -> _scriptString.substr(start, end - start + 1);
    switch (HashedString::ConstHash(s.c_str())){
        case HashedString::ConstHash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
        case HashedString::ConstHash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
        case HashedString::ConstHash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2);
        case HashedString::ConstHash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4);
        case HashedString::ConstHash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6);
        case HashedString::ConstHash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3);
        case HashedString::ConstHash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5);
        case HashedString::ConstHash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3);
        case HashedString::ConstHash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8);
        case HashedString::ConstHash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2);
        case HashedString::ConstHash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2);
        case HashedString::ConstHash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5);
        case HashedString::ConstHash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3);
        case HashedString::ConstHash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3);
        case HashedString::ConstHash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2);
        case HashedString::ConstHash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6);
        case HashedString::ConstHash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4);
        case HashedString::ConstHash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4);
        case HashedString::ConstHash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5);
        default: return new IdentifierToken(HashedString(s), start, s.length());
    }
}

const unordered_map<char16_t, char16_t> ControlCharacters{ // NOLINT(cert-err58-cpp)
        {'0', '\0'},
        {'a', '\a'},
        {'b', '\b'},
        {'t', '\t'},
        {'n', '\n'},
        {'v', '\v'},
        {'f', '\f'},
        {'r', '\r'},
        {'"', '\"'},
        {'\'', '\''},
        {'\?', '\?'},
        {'\\', '\\'},
};

IToken* Lexer::LexString(char16_t c){
    auto start = this -> _position - 1;
    auto end = start;
    char16_t last = c;
    while (true){
        char16_t next = this -> Peek();
        if (next == '\0') break;
        if (next == c && last != '\\') break;
        this -> Next();
        end++;
        last = next;
    }
    auto closeToken = this -> Next();
    if (closeToken != c){
        this -> ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedCharacter, this->_position - 1, 1);
        return new SimpleToken(TokenKind::BadToken, start, end -start + 1);
    }

    u16string s = this -> _scriptString.substr(start + 1, end - start);
    std::basic_ostringstream<char16_t > stream;
    for (int i = 0; i < s.size(); i++){
        c = s[i];
        if (c == '\\'){
            i++;
            c = s[i];
            if (ControlCharacters.find(c) != ControlCharacters.end()) {
                stream << ControlCharacters.at(c);
            } else{
                this -> ScriptData->Diagnostics->LogError(DiagnosticCode::InvalidStringControlCharacter, start + 1 + i, 1);
                stream << c;
            }
        } else{
            stream << c;
        }
    }
    return new StringToken(stream.str(), start, end - start );
}
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`#include <utility>`
			`#include <cmath>`
Lex Strings 2019-05-22 11:24:28 +00:00			`#include <unordered_map>`
			`#include <sstream>`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00
			`#include "Lexer.hpp"`

Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`Lexer::Lexer(const u16string& scriptString, class Script* script)`
Always pass the script string around by reference 2019-06-13 15:37:23 +00:00			`: _scriptString(scriptString)`
			`{`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`this->_scriptSize = scriptString.size();`
			`this -> ScriptData = script;`
			`this -> _position = 0;`
			`}`


Make Lexer use constant Tokens 2019-06-13 16:49:38 +00:00			`vector<const IToken*> Lexer::Lex() {`
			`vector<const IToken*> tokens;`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`while (true){`
Add support for diagnostics 2019-05-21 11:56:08 +00:00			`IToken* next = this -> LexNext(this -> Next());`
Expanded on diagnostics, make whitespace completely ignored 2019-05-21 13:11:00 +00:00			`auto nextKind = next -> GetKind();`
			`if (nextKind != TokenKind::WhiteSpace)`
			`tokens.push_back(next);`
Fix memory leak related to lexer skipping whitespace. 2019-05-24 17:18:03 +00:00			`else`
			`delete next;`
Expanded on diagnostics, make whitespace completely ignored 2019-05-21 13:11:00 +00:00			`if (nextKind == TokenKind::EndOfFile)`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`break;`
			`}`
			`return tokens;`
			`}`

Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`char16_t Lexer::Peek(){`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`if (Lexer::_position >= this -> _scriptSize)`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`return '\0';`
Always pass the script string around by reference 2019-06-13 15:37:23 +00:00			`return this -> _scriptString.at(Lexer::_position);`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`}`

Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`char16_t Lexer::Next(){`
			`char16_t next = Peek();`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`Lexer::_position++;`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`return next;`
			`}`

Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`IToken* Lexer::LexNext(char16_t c){`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`switch (c) {`
			`case '\0':`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`return new SimpleToken(TokenKind::EndOfFile, this -> _position - 1, 1);`
Adds support for parenthesized expressions 2019-05-21 15:16:53 +00:00			`case ' ': case '\t': case '\n': case '\r': case '\v': case '\f':`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`return new SimpleToken(TokenKind::WhiteSpace, this -> _position - 1, 1);`
Add a couple more characters to lex 2019-05-19 10:49:26 +00:00			`case '+':`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`return new SimpleToken(TokenKind::PlusToken, this -> _position - 1, 1);`
Add a couple more characters to lex 2019-05-19 10:49:26 +00:00			`case '-':`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`return new SimpleToken(TokenKind::MinusToken, this -> _position - 1, 1);`
Add a couple more characters to lex 2019-05-19 10:49:26 +00:00			`case '/':`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`return new SimpleToken(TokenKind::SlashToken, this -> _position - 1, 1);`
Add a couple more characters to lex 2019-05-19 10:49:26 +00:00			`case '*':`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`return new SimpleToken(TokenKind::StarToken, this -> _position - 1, 1);`
Adds support for parenthesized expressions 2019-05-21 15:16:53 +00:00			`case '(':`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`return new SimpleToken(TokenKind::OpenParenthesis, this -> _position - 1, 1);`
Adds support for parenthesized expressions 2019-05-21 15:16:53 +00:00			`case ')':`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`return new SimpleToken(TokenKind::CloseParenthesis, this -> _position - 1, 1);`
Added basics for lexing index expressions 2019-06-05 19:01:59 +00:00			`case '[':`
			`return new SimpleToken(TokenKind::OpenSquareBracket, this -> _position - 1, 1);`
			`case ']':`
			`return new SimpleToken(TokenKind::CloseSquareBracket, this -> _position - 1, 1);`
Implements basic numerical tables 2019-06-09 18:15:09 +00:00			`case '{':`
			`return new SimpleToken(TokenKind::OpenCurlyBracket, this -> _position - 1, 1);`
			`case '}':`
			`return new SimpleToken(TokenKind::CloseCurlyBracket, this -> _position - 1, 1);`
Implements parsing function declarations 2019-05-31 13:00:14 +00:00			`case ',':`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`return new SimpleToken(TokenKind::CommaToken, this -> _position - 1, 1);`
Added basics for lexing index expressions 2019-06-05 19:01:59 +00:00			`case '.':`
			`return new SimpleToken(TokenKind::PeriodToken, this -> _position - 1, 1);`
Add a couple more characters to lex 2019-05-19 10:49:26 +00:00			`case '=':`
			`if (Lexer::Peek() == '='){`
			`Lexer::Next();`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`return new SimpleToken(TokenKind::EqualityToken, this -> _position - 2, 2);`
Add a couple more characters to lex 2019-05-19 10:49:26 +00:00			`}`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`return new SimpleToken(TokenKind::AssignmentToken, this -> _position - 1, 1);`
Implemented comparison equality operators 2019-06-08 13:38:08 +00:00			`case '<':`
			`if (Lexer::Peek() == '='){`
			`Lexer::Next();`
			`return new SimpleToken(TokenKind::LessEquals, this -> _position - 2, 2);`
			`}`
			`return new SimpleToken(TokenKind::Less, this -> _position - 1, 1);`
			`case '>':`
			`if (Lexer::Peek() == '='){`
			`Lexer::Next();`
			`return new SimpleToken(TokenKind::GreaterEquals, this -> _position - 2, 2);`
			`}`
			`return new SimpleToken(TokenKind::Greater, this -> _position - 1, 1);`
Implements inequality token 2019-05-25 12:17:52 +00:00			`case '~':`
			`if (Lexer::Peek() == '='){`
			`Lexer::Next();`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`return new SimpleToken(TokenKind::InequalityToken, this -> _position - 2, 2);`
Implements inequality token 2019-05-25 12:17:52 +00:00			`}`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1);`
			`return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1);`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':`
			`return LexNumber(c);`
Lex Strings 2019-05-22 11:24:28 +00:00			`case '"':`
			`return LexString(c);`
			`case '\'':`
			`return LexString(c);`
Added lexing support for identifiers and keywords 2019-05-19 12:26:21 +00:00			`case '_':`
Lex Strings 2019-05-22 11:24:28 +00:00			`return LexIdentifierOrKeyword();`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`default:`
Added lexing support for identifiers and keywords 2019-05-19 12:26:21 +00:00			`if (isalpha(c)){`
Lex Strings 2019-05-22 11:24:28 +00:00			`return LexIdentifierOrKeyword();`
Added lexing support for identifiers and keywords 2019-05-19 12:26:21 +00:00			`}`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`this -> ScriptData -> Diagnostics -> LogError(DiagnosticCode::UnexpectedCharacter, this -> _position - 1, 1);`
			`return new SimpleToken(TokenKind::BadToken, this -> _position - 1, 1);`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`}`
			`}`

Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`int CharToInt(char16_t c){`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`switch (c){`
			`case '0': return 0;`
			`case '1': return 1;`
			`case '2': return 2;`
			`case '3': return 3;`
			`case '4': return 4;`
			`case '5': return 5;`
			`case '6': return 6;`
			`case '7': return 7;`
			`case '8': return 8;`
			`case '9': return 9;`
			`default: return -1;`
			`}`
			`}`

Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`IToken* Lexer::LexNumber(char16_t c){`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`long int_value = CharToInt(c);`
			`double float_value = 0;`
			`short decimal_index = 0;`
			`bool has_point = false;`
			`bool is_searching = true;`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`unsigned int start = this -> _position - 1;`
Save position and length of tokens 2019-05-19 14:11:16 +00:00			`unsigned int length = 1;`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`while (is_searching){`
Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`char16_t next = this -> Peek();`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`int next_val = CharToInt(next);`
			`if (next_val == -1){`
			`switch (next){`
Dont consume the character immediately following a number 2019-05-19 10:20:08 +00:00			`case '_':`
Add support for diagnostics 2019-05-21 11:56:08 +00:00			`this -> Next();`
Save position and length of tokens 2019-05-19 14:11:16 +00:00			`length++;`
Dont consume the character immediately following a number 2019-05-19 10:20:08 +00:00			`continue;`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`case '.':`
Add support for diagnostics 2019-05-21 11:56:08 +00:00			`this -> Next();`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`has_point = true;`
			`decimal_index = 0;`
			`float_value = int_value;`
Save position and length of tokens 2019-05-19 14:11:16 +00:00			`length++;`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`continue;`
			`default:`
			`is_searching = false;`
			`continue;`
			`}`
			`}`
			`else{`
Add support for diagnostics 2019-05-21 11:56:08 +00:00			`this -> Next();`
Save position and length of tokens 2019-05-19 14:11:16 +00:00			`length++;`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`if (has_point){`
			`decimal_index++;`
			`float_value += next_val / pow(10, decimal_index);`
			`}`
			`else {`
			`int_value *= 10;`
			`int_value += next_val;`
			`}`
			`}`
			`}`
			`if (has_point){`
Save position and length of tokens 2019-05-19 14:11:16 +00:00			`return new FloatToken(float_value, start, length);`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`}`
			`else{`
Save position and length of tokens 2019-05-19 14:11:16 +00:00			`return new IntegerToken(int_value, start, length);`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`}`
			`}`

Lex Strings 2019-05-22 11:24:28 +00:00			`IToken * Lexer::LexIdentifierOrKeyword() {`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`auto start = this -> _position - 1;`
Improved performance for lexing identifiers/keywords 2019-05-22 10:41:08 +00:00			`auto end = start;`
Added lexing support for identifiers and keywords 2019-05-19 12:26:21 +00:00			`while (true){`
Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`char16_t next = this -> Peek();`
Added lexing support for identifiers and keywords 2019-05-19 12:26:21 +00:00			`if (next == '\0') break;`
			`if (isalpha(next) \|\| next == '_'){`
Add support for diagnostics 2019-05-21 11:56:08 +00:00			`this -> Next();`
Improved performance for lexing identifiers/keywords 2019-05-22 10:41:08 +00:00			`end++;`
Added lexing support for identifiers and keywords 2019-05-19 12:26:21 +00:00			`}`
			`else{`
			`break;`
			`}`
Initial commit, adds very basic Lexing 2019-05-18 18:35:51 +00:00			`}`
Improved performance for lexing identifiers/keywords 2019-05-22 10:41:08 +00:00
Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`u16string s = this -> _scriptString.substr(start, end - start + 1);`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`switch (HashedString::ConstHash(s.c_str())){`
			`case HashedString::ConstHash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);`
			`case HashedString::ConstHash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);`
			`case HashedString::ConstHash("do"): return new SimpleToken(TokenKind::DoKeyword, start, 2);`
			`case HashedString::ConstHash("else"): return new SimpleToken(TokenKind::ElseKeyword, start, 4);`
			`case HashedString::ConstHash("elseif"): return new SimpleToken(TokenKind::ElseIfKeyword, start, 6);`
			`case HashedString::ConstHash("end"): return new SimpleToken(TokenKind::EndKeyword, start, 3);`
			`case HashedString::ConstHash("false"): return new SimpleToken(TokenKind::FalseKeyword, start, 5);`
			`case HashedString::ConstHash("for"): return new SimpleToken(TokenKind::ForKeyword, start, 3);`
			`case HashedString::ConstHash("function"): return new SimpleToken(TokenKind::FunctionKeyword, start, 8);`
			`case HashedString::ConstHash("if"): return new SimpleToken(TokenKind::IfKeyword, start, 2);`
			`case HashedString::ConstHash("in"): return new SimpleToken(TokenKind::InKeyword, start, 2);`
			`case HashedString::ConstHash("local"): return new SimpleToken(TokenKind::LocalKeyword, start, 5);`
			`case HashedString::ConstHash("nil"): return new SimpleToken(TokenKind::NilKeyword, start, 3);`
			`case HashedString::ConstHash("not"): return new SimpleToken(TokenKind::NotKeyword, start, 3);`
			`case HashedString::ConstHash("or"): return new SimpleToken(TokenKind::OrKeyword, start, 2);`
			`case HashedString::ConstHash("return"): return new SimpleToken(TokenKind::ReturnKeyword, start, 6);`
			`case HashedString::ConstHash("then"): return new SimpleToken(TokenKind::ThenKeyword, start, 4);`
			`case HashedString::ConstHash("true"): return new SimpleToken(TokenKind::TrueKeyword, start, 4);`
			`case HashedString::ConstHash("while"): return new SimpleToken(TokenKind::WhileKeyword, start, 5);`
Make Lexer use constant Tokens 2019-06-13 16:49:38 +00:00			`default: return new IdentifierToken(HashedString(s), start, s.length());`
Add a couple more characters to lex 2019-05-19 10:49:26 +00:00			`}`
Lex Strings 2019-05-22 11:24:28 +00:00			`}`

Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`const unordered_map<char16_t, char16_t> ControlCharacters{ // NOLINT(cert-err58-cpp)`
Lex Strings 2019-05-22 11:24:28 +00:00			`{'0', '\0'},`
			`{'a', '\a'},`
			`{'b', '\b'},`
			`{'t', '\t'},`
			`{'n', '\n'},`
			`{'v', '\v'},`
			`{'f', '\f'},`
			`{'r', '\r'},`
			`{'"', '\"'},`
			`{'\'', '\''},`
			`{'\?', '\?'},`
			`{'\\', '\\'},`
			`};`

Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`IToken* Lexer::LexString(char16_t c){`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`auto start = this -> _position - 1;`
Lex Strings 2019-05-22 11:24:28 +00:00			`auto end = start;`
Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`char16_t last = c;`
Lex Strings 2019-05-22 11:24:28 +00:00			`while (true){`
Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`char16_t next = this -> Peek();`
Lex Strings 2019-05-22 11:24:28 +00:00			`if (next == '\0') break;`
			`if (next == c && last != '\\') break;`
			`this -> Next();`
			`end++;`
			`last = next;`
			`}`
			`auto closeToken = this -> Next();`
			`if (closeToken != c){`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`this -> ScriptData->Diagnostics->LogError(DiagnosticCode::UnexpectedCharacter, this->_position - 1, 1);`
Lex Strings 2019-05-22 11:24:28 +00:00			`return new SimpleToken(TokenKind::BadToken, start, end -start + 1);`
			`}`

Move Lexer to u16string handling, for unicode support 2019-06-15 15:20:27 +00:00			`u16string s = this -> _scriptString.substr(start + 1, end - start);`
			`std::basic_ostringstream<char16_t > stream;`
Lex Strings 2019-05-22 11:24:28 +00:00			`for (int i = 0; i < s.size(); i++){`
			`c = s[i];`
			`if (c == '\\'){`
			`i++;`
			`c = s[i];`
			`if (ControlCharacters.find(c) != ControlCharacters.end()) {`
			`stream << ControlCharacters.at(c);`
			`} else{`
			`this -> ScriptData->Diagnostics->LogError(DiagnosticCode::InvalidStringControlCharacter, start + 1 + i, 1);`
			`stream << c;`
			`}`
			`} else{`
			`stream << c;`
			`}`
			`}`
			`return new StringToken(stream.str(), start, end - start );`
Minor optimizations in lexer 2019-06-05 17:11:56 +00:00			`}`