Move Lexer to u16string handling, for unicode support
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2019-06-15 17:20:27 +02:00
parent f73bd2003c
commit 3dc67ec8a0
21 changed files with 189 additions and 145 deletions

View File

@@ -104,9 +104,9 @@ public:
};
class BoundLiteralStringExpression : public BoundExpression{
const string _value;
const u16string _value;
public:
BoundLiteralStringExpression(string value, unsigned int start, unsigned int length)
BoundLiteralStringExpression(u16string value, unsigned int start, unsigned int length)
: BoundExpression(start, length, make_shared<StringScriptType>(true, HashedString::ConstHash(value.c_str()))),
_value(value)
{
@@ -116,7 +116,7 @@ public:
return BoundExpressionKind ::LiteralString;
}
const string GetValue() const{
const u16string GetValue() const{
return _value;
}
};

View File

@@ -96,7 +96,7 @@ shared_ptr<BooleanEvalValue> Evaluator::EvaluateBooleanBinary(const BoundBinaryE
shared_ptr<StringEvalValue> Evaluator::EvaluateStringBinary(const BoundBinaryExpression* expression){
if (expression->GetOperation() != BoundBinaryOperation::Concatenation)
throw;
std::ostringstream strs;
std::basic_ostringstream<char16_t > strs;
auto left = this -> EvaluateStringExpression(expression->GetLeft());
strs << *left->EvaluateString();
auto right = this -> EvaluateExpression(expression->GetRight());

View File

@@ -20,7 +20,7 @@ extern "C" {
return v->EvaluateBool();
}
const char* EvaluateEvalValueString(EvalValue* v){
const char16_t * EvaluateEvalValueString(EvalValue* v){
return v->EvaluateString() -> c_str();
}
@@ -36,7 +36,7 @@ extern "C" {
return new BooleanEvalValue(b);
}
EvalValue* CreateStringEvalValue(const char* s){
EvalValue* CreateStringEvalValue(const char16_t * s){
return new StringEvalValue(s);
}
}
@@ -47,11 +47,12 @@ extern "C" {
TEST_CASE( "Evaluate String", "[integration]" ) {
auto script = Script::Create("\"foo bar\"");
auto script = Script::Create(u"\"foo bar\"");
REQUIRE(!script->Diagnostics -> HasErrors());
script->Evaluate();
auto lastValue = script->GetLastValue();
REQUIRE(std::strcmp(EvaluateEvalValueString(lastValue), "foo bar") == 0);
auto s = u16string(EvaluateEvalValueString(lastValue));
REQUIRE(s == u"foo bar");
delete script;
}

View File

@@ -31,7 +31,7 @@ public:
virtual bool EvaluateBool() const{
throw EvaluationException("Can't evaluate this EvalValue as bool.");
}
virtual const std::string* EvaluateString() const {
virtual const std::u16string* EvaluateString() const {
throw EvaluationException("Can't evaluate this EvalValue as string.");
}

View File

@@ -9,10 +9,10 @@
using namespace std;
class StringEvalValue : public EvalValue{
string _value;
u16string _value;
size_t _hash;
public:
explicit StringEvalValue(string s){
explicit StringEvalValue(u16string s){
_value = move(s);
_hash = HashedString::ConstHash (_value.c_str());
}
@@ -27,7 +27,7 @@ public:
return this->_hash == b->GetHashCode();
};
const string* EvaluateString() const final{
const u16string* EvaluateString() const final{
return &_value;
}
@@ -38,7 +38,7 @@ public:
shared_ptr<EvalValue> IndexValue(EvalValue* val) final{
// Porygon is 1-indexed, so we convert to that.
auto l = val->EvaluateInteger() - 1;
return make_shared<StringEvalValue>(string(1, _value[l]));
return make_shared<StringEvalValue>(u16string(1, _value[l]));
}
std::size_t GetHashCode() final{

View File

@@ -5,7 +5,7 @@
#include "Lexer.hpp"
Lexer::Lexer(const string& scriptString, class Script* script)
Lexer::Lexer(const u16string& scriptString, class Script* script)
: _scriptString(scriptString)
{
this->_scriptSize = scriptString.size();
@@ -29,19 +29,19 @@ vector<const IToken*> Lexer::Lex() {
return tokens;
}
char Lexer::Peek(){
char16_t Lexer::Peek(){
if (Lexer::_position >= this -> _scriptSize)
return '\0';
return this -> _scriptString.at(Lexer::_position);
}
char Lexer::Next(){
char next = Peek();
char16_t Lexer::Next(){
char16_t next = Peek();
Lexer::_position++;
return next;
}
IToken* Lexer::LexNext(char c){
IToken* Lexer::LexNext(char16_t c){
switch (c) {
case '\0':
return new SimpleToken(TokenKind::EndOfFile, this -> _position - 1, 1);
@@ -113,7 +113,7 @@ IToken* Lexer::LexNext(char c){
}
}
int CharToInt(char c){
int CharToInt(char16_t c){
switch (c){
case '0': return 0;
case '1': return 1;
@@ -129,7 +129,7 @@ int CharToInt(char c){
}
}
IToken* Lexer::LexNumber(char c){
IToken* Lexer::LexNumber(char16_t c){
long int_value = CharToInt(c);
double float_value = 0;
short decimal_index = 0;
@@ -138,7 +138,7 @@ IToken* Lexer::LexNumber(char c){
unsigned int start = this -> _position - 1;
unsigned int length = 1;
while (is_searching){
char next = this -> Peek();
char16_t next = this -> Peek();
int next_val = CharToInt(next);
if (next_val == -1){
switch (next){
@@ -183,7 +183,7 @@ IToken * Lexer::LexIdentifierOrKeyword() {
auto start = this -> _position - 1;
auto end = start;
while (true){
char next = this -> Peek();
char16_t next = this -> Peek();
if (next == '\0') break;
if (isalpha(next) || next == '_'){
this -> Next();
@@ -194,7 +194,7 @@ IToken * Lexer::LexIdentifierOrKeyword() {
}
}
string s = this -> _scriptString.substr(start, end - start + 1);
u16string s = this -> _scriptString.substr(start, end - start + 1);
switch (HashedString::ConstHash(s.c_str())){
case HashedString::ConstHash("and"): return new SimpleToken(TokenKind::AndKeyword, start, 3);
case HashedString::ConstHash("break"): return new SimpleToken(TokenKind::BreakKeyword, start, 5);
@@ -219,7 +219,7 @@ IToken * Lexer::LexIdentifierOrKeyword() {
}
}
const unordered_map<char, char> ControlCharacters{ // NOLINT(cert-err58-cpp)
const unordered_map<char16_t, char16_t> ControlCharacters{ // NOLINT(cert-err58-cpp)
{'0', '\0'},
{'a', '\a'},
{'b', '\b'},
@@ -234,12 +234,12 @@ const unordered_map<char, char> ControlCharacters{ // NOLINT(cert-err58-cpp)
{'\\', '\\'},
};
IToken* Lexer::LexString(char c){
IToken* Lexer::LexString(char16_t c){
auto start = this -> _position - 1;
auto end = start;
char last = c;
char16_t last = c;
while (true){
char next = this -> Peek();
char16_t next = this -> Peek();
if (next == '\0') break;
if (next == c && last != '\\') break;
this -> Next();
@@ -252,8 +252,8 @@ IToken* Lexer::LexString(char c){
return new SimpleToken(TokenKind::BadToken, start, end -start + 1);
}
string s = this -> _scriptString.substr(start + 1, end - start);
stringstream stream;
u16string s = this -> _scriptString.substr(start + 1, end - start);
std::basic_ostringstream<char16_t > stream;
for (int i = 0; i < s.size(); i++){
c = s[i];
if (c == '\\'){

View File

@@ -8,23 +8,23 @@
using namespace std;
class Lexer {
const string& _scriptString;
const u16string& _scriptString;
#ifdef TESTS_BUILD
public:
#endif
unsigned int _position;
unsigned int _scriptSize;
char Peek();
char Next();
IToken* LexNext(char c);
IToken* LexNumber(char c);
char16_t Peek();
char16_t Next();
IToken* LexNext(char16_t c);
IToken* LexNumber(char16_t c);
IToken* LexIdentifierOrKeyword();
IToken* LexString(char c);
IToken* LexString(char16_t c);
public:
Script* ScriptData;
vector<const IToken*> Lex();
explicit Lexer(const string& scriptString, class Script* script);
explicit Lexer(const u16string& scriptString, class Script* script);
};

View File

@@ -100,7 +100,7 @@ public:
};
class LiteralStringExpression : public ParsedExpression{
const string _value;
const u16string _value;
public:
const ParsedExpressionKind GetKind() const final{
return ParsedExpressionKind::LiteralString;
@@ -111,7 +111,7 @@ public:
{
}
const string& GetValue() const{
const u16string& GetValue() const{
return _value;
}
};

View File

@@ -91,10 +91,10 @@ public:
};
class StringToken : public IToken{
const string _value;
const u16string _value;
public:
explicit StringToken(string value, unsigned int position, unsigned int length)
explicit StringToken(u16string value, unsigned int position, unsigned int length)
: IToken(position, length),
_value(std::move(value))
{
@@ -104,7 +104,7 @@ public:
return TokenKind::String;
}
const string& GetValue() const{
const u16string& GetValue() const{
return _value;
}
};

View File

@@ -1,18 +1,29 @@
#include <utility>
#include <vector>
#include <iterator>
#include <locale>
#include <unordered_map>
#include <codecvt>
#include "Script.hpp"
#include "Parser/Lexer.hpp"
#include "Parser/Parser.hpp"
#include "Binder/Binder.hpp"
Script* Script::Create(const string& script) {
Script* Script::Create(const u16string& script) {
auto s = new Script();
s -> Parse(script);
return s;
}
std::u16string To_UTF16(const string &s)
{
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> conv;
return conv.from_bytes(s);
}
Script *Script::Create(const string &script) {
return Script::Create(To_UTF16(script));
}
Script::Script() {
Diagnostics = new DiagnosticsHolder();
_evaluator = new Evaluator(this);
@@ -32,7 +43,7 @@ Script::~Script() {
delete this->_scriptVariables;
}
void Script::Parse(const string& script) {
void Script::Parse(const u16string& script) {
auto lexer = Lexer(script, this);
auto lexResult = lexer.Lex();
auto parser = Parser(lexResult, this);
@@ -54,11 +65,11 @@ void Script::Parse(const string& script) {
delete parseResult;
}
EvalValue *Script::GetVariable(const string &key) {
EvalValue *Script::GetVariable(const u16string &key) {
return _scriptVariables -> at(HashedString(key).GetHash()).get();
}
bool Script::HasVariable(const string &key) {
bool Script::HasVariable(const u16string &key) {
auto f = _scriptVariables->find(HashedString(key).GetHash());
return f != _scriptVariables->end();
}
@@ -67,18 +78,19 @@ EvalValue *Script::GetLastValue() {
return _evaluator->GetLastValue();
}
bool Script::HasFunction(const string &key) {
bool Script::HasFunction(const u16string &key) {
auto f = _scriptVariables->find(HashedString(key).GetHash());
return f != _scriptVariables->end() && f.operator->()->second->GetTypeClass() == TypeClass ::Function;
}
shared_ptr<EvalValue> Script::CallFunction(const string &key, const vector<EvalValue *>& variables) {
shared_ptr<EvalValue> Script::CallFunction(const u16string &key, const vector<EvalValue *>& variables) {
auto var = (ScriptFunctionEvalValue*)GetVariable(key);
return this->_evaluator->EvaluateFunction(var, variables);
}
extern "C" {
Script* CreateScript(char * s){
Script* CreateScript(char16_t * s){
return Script::Create(s);
}
@@ -90,19 +102,19 @@ extern "C" {
return script->GetLastValue();
}
bool HasVariable(Script* script, const char* key){
bool HasVariable(Script* script, const char16_t* key){
return script->HasVariable(key);
}
EvalValue* GetVariable(Script* script, const char* key){
EvalValue* GetVariable(Script* script, const char16_t* key){
return script->GetVariable(key);
}
bool HasFunction(Script* script, const char* key){
bool HasFunction(Script* script, const char16_t* key){
return script->HasFunction(key);
}
EvalValue* CallFunction(Script* script, const char* key, EvalValue* parameters[], int parameterCount){
EvalValue* CallFunction(Script* script, const char16_t* key, EvalValue* parameters[], int parameterCount){
std::vector<EvalValue*> v(parameters, parameters + parameterCount);
return script->CallFunction(key, v).get();
}

View File

@@ -23,8 +23,9 @@ class Script {
shared_ptr<ScriptType> _returnType;
explicit Script();
void Parse(const string& script);
void Parse(const u16string& script);
public:
static Script* Create(const u16string& script);
static Script* Create(const string& script);
DiagnosticsHolder* Diagnostics;
@@ -42,11 +43,11 @@ public:
EvalValue* GetLastValue();
EvalValue* GetVariable(const string& key);
bool HasVariable(const string& key);
EvalValue* GetVariable(const u16string& key);
bool HasVariable(const u16string& key);
shared_ptr<EvalValue> CallFunction(const string& key, const vector<EvalValue*>& variables);
bool HasFunction(const string& key);
shared_ptr<EvalValue> CallFunction(const u16string& key, const vector<EvalValue*>& variables);
bool HasFunction(const u16string& key);
};

View File

@@ -7,12 +7,22 @@
class HashedString{
const uint32_t _hash;
public:
explicit HashedString(const std::string& s) : _hash(ConstHash(s.c_str())){
explicit HashedString(const std::u16string& s) : _hash(ConstHash(s.c_str())){
}
explicit HashedString(char16_t const *input) : _hash(ConstHash(input)){
}
explicit HashedString(char const *input) : _hash(ConstHash(input)){
}
HashedString(const HashedString& b) = default;
static uint32_t constexpr ConstHash(char16_t const *input) {
return *input ?
static_cast<uint32_t>(*input) + 33 * ConstHash(input + 1) :
5381;
}
static uint32_t constexpr ConstHash(char const *input) {
return *input ?
static_cast<uint32_t>(*input) + 33 * ConstHash(input + 1) :