Move Lexer to u16string handling, for unicode support
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2019-06-15 17:20:27 +02:00
parent f73bd2003c
commit 3dc67ec8a0
21 changed files with 189 additions and 145 deletions

View File

@@ -5,7 +5,7 @@
#include "Lexer.hpp"
Lexer::Lexer(const string& scriptString, class Script* script)
Lexer::Lexer(const u16string& scriptString, class Script* script)
: _scriptString(scriptString)
{
this->_scriptSize = scriptString.size();
@@ -29,19 +29,19 @@ vector<const IToken*> Lexer::Lex() {
return tokens;
}
char Lexer::Peek(){
char16_t Lexer::Peek(){
if (Lexer::_position >= this -> _scriptSize)
return '\0';
return this -> _scriptString.at(Lexer::_position);
}
char Lexer::Next(){
char next = Peek();
char16_t Lexer::Next(){
char16_t next = Peek();
Lexer::_position++;
return next;
}
IToken* Lexer::LexNext(char c){
IToken* Lexer::LexNext(char16_t c){
switch (c) {
case '\0':
return new SimpleToken(TokenKind::EndOfFile, this -> _position - 1, 1);
@@ -113,7 +113,7 @@ IToken* Lexer::LexNext(char c){
}
}
int CharToInt(char c){
int CharToInt(char16_t c){
switch (c){
case '0': return 0;
case '1': return 1;
@@ -129,7 +129,7 @@ int CharToInt(char c){
}
}
IToken* Lexer::LexNumber(char c){
IToken* Lexer::LexNumber(char16_t c){
long int_value = CharToInt(c);
double float_value = 0;
short decimal_index = 0;
@@ -138,7 +138,7 @@ IToken* Lexer::LexNumber(char c){
unsigned int start = this -> _position - 1;
unsigned int length = 1;
while (is_searching){
char next = this -> Peek();
char16_t next = this -> Peek();
int next_val = CharToInt(next);
if (next_val == -1){
switch (next){
@@ -183,7 +183,7 @@ IToken * Lexer::LexIdentifierOrKeyword() {
auto start = this -> _position - 1;
auto end = start;
while (true){
char next = this -> Peek();
char16_t next = this -> Peek();
if (next == '\0') break;
if (isalpha(next) || next == '_'){
this -> Next();
@@ -194,7 +194,7 @@ IToken * Lexer::LexIdentifierOrKeyword() {
}
}
string s = this -> _scriptString.substr(start, end - start + 1);
u16string s = this -> _scriptString.substr(start, end - start + 1);
switch (HashedString::ConstHash(s.c_str())){
case HashedString::ConstHash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
case HashedString::ConstHash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
@@ -219,7 +219,7 @@ IToken * Lexer::LexIdentifierOrKeyword() {
}
}
const unordered_map<char, char> ControlCharacters{ // NOLINT(cert-err58-cpp)
const unordered_map<char16_t, char16_t> ControlCharacters{ // NOLINT(cert-err58-cpp)
{'0', '\0'},
{'a', '\a'},
{'b', '\b'},
@@ -234,12 +234,12 @@ const unordered_map<char, char> ControlCharacters{ // NOLINT(cert-err58-cpp)
{'\\', '\\'},
};
IToken* Lexer::LexString(char c){
IToken* Lexer::LexString(char16_t c){
auto start = this -> _position - 1;
auto end = start;
char last = c;
char16_t last = c;
while (true){
char next = this -> Peek();
char16_t next = this -> Peek();
if (next == '\0') break;
if (next == c && last != '\\') break;
this -> Next();
@@ -252,8 +252,8 @@ IToken* Lexer::LexString(char c){
return new SimpleToken(TokenKind::BadToken, start, end -start + 1);
}
string s = this -> _scriptString.substr(start + 1, end - start);
stringstream stream;
u16string s = this -> _scriptString.substr(start + 1, end - start);
std::basic_ostringstream<char16_t > stream;
for (int i = 0; i < s.size(); i++){
c = s[i];
if (c == '\\'){

View File

@@ -8,23 +8,23 @@
using namespace std;
class Lexer {
const string& _scriptString;
const u16string& _scriptString;
#ifdef TESTS_BUILD
public:
#endif
unsigned int _position;
unsigned int _scriptSize;
char Peek();
char Next();
IToken* LexNext(char c);
IToken* LexNumber(char c);
char16_t Peek();
char16_t Next();
IToken* LexNext(char16_t c);
IToken* LexNumber(char16_t c);
IToken* LexIdentifierOrKeyword();
IToken* LexString(char c);
IToken* LexString(char16_t c);
public:
Script* ScriptData;
vector<const IToken*> Lex();
explicit Lexer(const string& scriptString, class Script* script);
explicit Lexer(const u16string& scriptString, class Script* script);
};

View File

@@ -100,7 +100,7 @@ public:
};
class LiteralStringExpression : public ParsedExpression{
const string _value;
const u16string _value;
public:
const ParsedExpressionKind GetKind() const final{
return ParsedExpressionKind::LiteralString;
@@ -111,7 +111,7 @@ public:
{
}
const string& GetValue() const{
const u16string& GetValue() const{
return _value;
}
};

View File

@@ -91,10 +91,10 @@ public:
};
class StringToken : public IToken{
const string _value;
const u16string _value;
public:
explicit StringToken(string value, unsigned int position, unsigned int length)
explicit StringToken(u16string value, unsigned int position, unsigned int length)
: IToken(position, length),
_value(std::move(value))
{
@@ -104,7 +104,7 @@ public:
return TokenKind::String;
}
const string& GetValue() const{
const u16string& GetValue() const{
return _value;
}
};