Initial commit, implements parser.

This commit is contained in:
Deukhoofd 2021-05-15 16:53:53 +02:00
commit f1af568cb8
Signed by: Deukhoofd
GPG Key ID: F63E044490819F6F
10 changed files with 1096 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/target
Cargo.lock
.idea/

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "seraph_script"
version = "0.1.0"
authors = ["Deukhoofd <Deukhoofd@gmail.com>"]
edition = "2018"
[dependencies]
itertools = "0.10.0"

99
grammar.ebnf Normal file
View File

@ -0,0 +1,99 @@
letter ::= 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G'
| 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N'
| 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U'
| 'V' | 'W' | 'X' | 'Y' | 'Z' | 'a' | 'b'
| 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i'
| 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p'
| 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w'
| 'x' | 'y' | 'z' ;
all_characters ::= ;
digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' ;
hexadecimal_digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' |
'8' | '9' | 'A' | 'a' | 'B' | 'b' | 'C' | 'c' |
'D' | 'd' | 'E' | 'e' | 'F' | 'f';
octal_digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7';
binary_digit ::= '0' | '1';
character ::= letter | digit | '_' ;
identifier ::= (letter | '_') { character };
float ::= digit { digit } '.' digit {digit};
dec_integer ::= ['0D' | '0d'] digit {digit};
hex_integer ::= ('0X'|'0x') hexadecimal_digit {hexadecimal_digit};
octal_integer ::= ('0O'|'0o') octal_digit {octal_digit};
binary_integer ::= ('0B'|'0b') binary_digit {binary_digit};
integer ::= dec_integer | hex_integer | octal_integer | binary_integer;
number ::= integer | float;
string ::= ('\'' {all_characters} '\'' | '\"' {all_characters} '\"' | '\"\"\"' {all_characters} '\"\"\"');
assignop ::= '=' | '+=' | '-=' | '*=' | '/=' | '|=' | '&=' | '^=' | '%=' | '**=' | '<<=' | '>>=' | '>>>=';
logicop ::= '&&' | '||' | '^^' | 'and' | 'or' | 'xor';
compop ::= '==' | '!=' | '<' | '<=' | '>' | '>=' | 'is' | '!is';
mathop ::= '+' | '-' | '*' | '/' | '%' | '**';
bitop ::= '&' | '|' | '^' | '<<' | '>>' | '>>>';
primtype ::= 'void' | 'int' | 'int8' | 'int16' | 'int32' | 'int64' | 'uint' | 'uint8' | 'uint16' |
'uint32' | 'uint64' | 'float' | 'double' | 'bool';
datatype ::= (identifier | primtype | 'auto');
scope ::= ['::'] {identifier '::'} [identifier ['<' type {',' type} '>'] '::'];
type ::= ['const'] scope datatype ['<' type {',' type} '>'] { ('[' ']') | ('@' ['const']) };
# ternary is defined further below due to a circular dependency: ternary->expr->exprterm->initlist->assign
assign ::= ternary [ assignop assign ];
initlist ::= '{' [assign | initlist] {',' [assign | initlist]} '}';
exprpreop ::= '-' | '+' | '!' | '++' | '--' | '~' | '@';
arglist ::= '(' [identifier ':'] assign {',' [identifier ':'] assign} ')';
funccall ::= scope identifier arglist;
constructcall ::= type arglist;
varaccess ::= scope | identifier;
cast ::= 'cast' '<' type '>' '(' assign ')';
literal ::= number | string | 'true' | 'false' | 'null';
typemod ::= ['&' ['in' | 'out' | 'inout']];
# statblock is defined further below, as statements are higher level than expressions.
lambda ::= 'function' '(' [[type typemod] identifier {',' [type typemod] identifier}] ')' statblock;
exprvalue ::= 'void' | constructcall | funccall | varaccess | cast | literal | '(' assign ')' | lambda;
exprpostop ::= ('.' (funccall | identifier)) | ('[' [identifier ':'] assign {',' [identifier ':' assign} ']') | arglist | '++' | '--';
exprterm ::= ([type '='] initlist) | ({exprpreop} exprvalue {exprpostop});
expr ::= exprterm {(mathop | compop | logicop | bitop) exprterm};
ternary ::= expr ['?' assign : assign];
return ::= 'return' [assign] ';';
exprstat ::= assign ';';
continue ::= 'continue' ';';
break ::= 'break' ';';
# As these are all statements using other statements, they use the statement and statblock types defined further below.
if ::= 'if' '(' assign ')' statement ['else' statement];
for ::= 'for' '(' (var | exprstat) exprstat [assign {',' assign}] ')' statement;
while ::= 'while' '(' assign ')' statement;
dowhile ::= 'do' statement 'while' '(' assign ')' ';';
try ::= 'try' statblock 'catch' statblock;
case ::= (('case' expr) | 'default') ':' {statement};
switch ::= 'switch' '(' assign ')' '{' {case} '}';
statement ::= (if | for | while | return | statblock | break | continue | dowhile | switch | exprstat | try );
var ::= ['private'|'protected'] type identifier [( '=' (initlist | expr)) | arglist] {',' identifier [( '=' (initlist | expr)) | arglist]} ';';
statblock ::= '{' {var | statement} '}';
funcattr ::= {'override' | 'final' | 'explicit' | 'property'};
paramlist ::= '(' ['void' | (type typemod [identifier] ['=' expr] {',' type typemod [identifier] ['=' expr]})] ')';
virtprop ::= ['private' | 'protected'] type ['&'] identifier '{' {('get' | 'set') ['const'] funcattr (statblock | ';')} '}';
func ::= {'shared' | 'external'} ['private' | 'protected'] [((type ['&']) | '~')] identifier paramlist ['const'] funcattr (';' | statblock);
funcdef ::= {'external' | 'shared'} 'funcdef' type ['&'] identifier paramlist ';'
class ::= {'shared' | 'abstract' | 'final' | 'external'} 'class' identifier
(';' | ([':' identifier {',' identifier}] '{' {virtprop | func | var | funcdef | class} '}'));
mixin ::= 'mixin' class;
enum ::= {'shared' | 'external'} 'enum' identifier [ ':' primtype ] (';' | ('{' identifier ['=' expr] {',' identifier ['=' expr]} '}'));
import ::= 'import' type ['&'] identifier paramlist funcattr 'from' string ';';
typedef ::= 'typedef' (primtype | identifier) identifier ';';
interfacemethod ::= type ['&'] identifier paramlist ['const'] ';';
interface ::= {'external' | 'shared'} 'interface' identifier (';' | ([':' identifier {',' identifier}] '{' {virtprop | interfacemethod} '}'));
namespace ::= 'namespace' identifier '{' script '}';
script ::= {import | enum | typedef | class | mixin | interface | funcdef | virtprop | var | func | namespace | ';'};

4
src/defines.rs Normal file
View File

@ -0,0 +1,4 @@
/// The size integers use internally to store literals and do compile time calculations.
pub type LiteralInt = i64;
/// The size floating point numbers use internally to store literals and do compile time calculations.
pub type LiteralFloat = f64;

5
src/lib.rs Normal file
View File

@ -0,0 +1,5 @@
#![feature(concat_idents)]
#![feature(exclusive_range_pattern)]
pub(crate) mod defines;
pub mod parsing;

View File

@ -0,0 +1,242 @@
use crate::defines::{LiteralFloat, LiteralInt};
use crate::parsing::lex_tokens::LexToken;
use itertools::MultiPeek;
use std::str::Chars;
#[inline(always)]
fn get_decimal_value(c: char) -> Option<LiteralInt> {
match c {
'0' => Some(0),
'1' => Some(1),
'2' => Some(2),
'3' => Some(3),
'4' => Some(4),
'5' => Some(5),
'6' => Some(6),
'7' => Some(7),
'8' => Some(8),
'9' => Some(9),
_ => None,
}
}
#[inline(always)]
fn lex_numeric_default(chars: &mut MultiPeek<Chars>) -> LexToken {
let mut int_value: LiteralInt = 0;
let mut decimal_value: LiteralInt = 0;
let mut exponent_value: LiteralInt = 0;
let mut decimal_length: LiteralInt = 0;
let mut is_decimal = false;
let mut is_exponent = false;
let mut is_reading = true;
let mut c: Option<char> = chars.peek().cloned();
while c.is_some() && is_reading {
let v = get_decimal_value(c.unwrap());
match v {
None => {
if c.unwrap() == '_' {
chars.next();
c = chars.peek().cloned();
continue;
}
if !is_decimal && c.unwrap() == '.' {
is_decimal = true;
chars.next();
c = chars.peek().cloned();
continue;
}
if is_decimal && c.unwrap() == 'e' || c.unwrap() == 'E' {
is_decimal = false;
is_exponent = true;
chars.next();
c = chars.peek().cloned();
continue;
}
c = chars.peek().cloned();
is_reading = false;
continue;
}
Some(i) => {
chars.next();
if is_decimal {
decimal_value *= 10;
decimal_value += i;
decimal_length += 1;
} else if is_exponent {
exponent_value *= 10;
exponent_value += i;
} else {
int_value *= 10;
int_value += i;
}
}
}
c = chars.peek().cloned();
}
chars.reset_peek();
if is_decimal || is_exponent {
let mut val = int_value as LiteralFloat
+ (decimal_value as LiteralFloat / 10_i64.pow(decimal_length as u32) as LiteralFloat);
if is_exponent {
val *= exponent_value.pow(10) as LiteralFloat;
}
LexToken::FloatLiteral(val)
} else {
LexToken::IntegerLiteral(int_value)
}
}
#[inline(always)]
fn get_hexadecimal_value(c: char) -> Option<LiteralInt> {
match c {
'0' => Some(0),
'1' => Some(1),
'2' => Some(2),
'3' => Some(3),
'4' => Some(4),
'5' => Some(5),
'6' => Some(6),
'7' => Some(7),
'8' => Some(8),
'9' => Some(9),
'A' | 'a' => Some(10),
'B' | 'b' => Some(11),
'C' | 'c' => Some(12),
'D' | 'd' => Some(13),
'E' | 'e' => Some(14),
'F' | 'f' => Some(15),
_ => None,
}
}
#[inline(always)]
fn lex_numeric_hexadecimal(chars: &mut MultiPeek<Chars>) -> LexToken {
let mut int_value: LiteralInt = 0;
let mut reading = true;
let mut n = chars.peek().cloned();
while n.is_some() && reading {
match get_hexadecimal_value(n.unwrap()) {
Some(i) => {
int_value <<= 4;
int_value += i;
chars.next();
}
None => {
if n.unwrap() == '_' {
chars.next();
} else {
reading = false
}
}
}
n = chars.peek().cloned();
}
LexToken::IntegerLiteral(int_value)
}
#[inline(always)]
fn get_octal_value(c: char) -> Option<LiteralInt> {
match c {
'0' => Some(0),
'1' => Some(1),
'2' => Some(2),
'3' => Some(3),
'4' => Some(4),
'5' => Some(5),
'6' => Some(6),
'7' => Some(7),
_ => None,
}
}
#[inline(always)]
fn lex_numeric_octal(chars: &mut MultiPeek<Chars>) -> LexToken {
let mut int_value: LiteralInt = 0;
let mut reading = true;
let mut n = chars.peek().cloned();
while n.is_some() && reading {
match get_octal_value(n.unwrap()) {
Some(i) => {
int_value <<= 3;
int_value += i;
chars.next();
}
None => {
if n.unwrap() == '_' {
chars.next();
} else {
reading = false
}
}
}
n = chars.peek().cloned();
}
LexToken::IntegerLiteral(int_value)
}
#[inline(always)]
fn get_binary_value(c: char) -> Option<LiteralInt> {
match c {
'0' => Some(0),
'1' => Some(1),
_ => None,
}
}
#[inline(always)]
fn lex_numeric_binary(chars: &mut MultiPeek<Chars>) -> LexToken {
let mut int_value: LiteralInt = 0;
let mut reading = true;
let mut n = chars.peek().cloned();
while n.is_some() && reading {
match get_binary_value(n.unwrap()) {
Some(i) => {
int_value <<= 1;
int_value += i;
chars.next();
}
None => {
if n.unwrap() == '_' {
chars.next();
} else {
reading = false
}
}
}
n = chars.peek().cloned();
}
LexToken::IntegerLiteral(int_value)
}
#[inline(always)]
pub fn lex_numeric(chars: &mut MultiPeek<Chars>) -> LexToken {
chars.reset_peek();
if chars.peek() == Some(&'0') {
match chars.peek() {
Some(&'D') | Some(&'d') => {
chars.next();
chars.next();
return lex_numeric_default(chars);
}
Some(&'X') | Some(&'x') => {
chars.next();
chars.next();
return lex_numeric_hexadecimal(chars);
}
Some(&'O') | Some(&'o') => {
chars.next();
chars.next();
return lex_numeric_octal(chars);
}
Some(&'B') | Some(&'b') => {
chars.next();
chars.next();
return lex_numeric_binary(chars);
}
_ => {}
}
}
chars.reset_peek();
lex_numeric_default(chars)
}

138
src/parsing/lex_tokens.rs Normal file
View File

@ -0,0 +1,138 @@
use crate::defines::{LiteralFloat, LiteralInt};
#[derive(PartialEq, Debug)]
pub enum LexToken {
EndOfFile,
WhiteSpace,
Identifier(String),
IntegerLiteral(LiteralInt),
FloatLiteral(LiteralFloat),
StringLiteral(String),
Semicolon,
Colon,
OpenBracket,
CloseBracket,
OpenCurlyBracket,
CloseCurlyBracket,
OpenBlockBracket,
CloseBlockBracket,
// Keywords
AndKeyword,
AbstractKeyword,
AutoKeyword,
BoolKeyword,
BreakKeyword,
CaseKeyword,
CastKeyword,
CatchKeyword,
ClassKeyword,
ConstKeyword,
ContinueKeyword,
DefaultKeyword,
DoKeyword,
DoubleKeyword,
ElseKeyword,
EnumKeyword,
ExplicitKeyword,
ExternalKeyword,
FalseKeyword,
FinalKeyword,
FloatKeyword,
ForKeyword,
FromKeyword,
FuncDefKeyword,
FunctionKeyword,
GetKeyword,
IfKeyword,
ImportKeyword,
InKeyword,
InOutKeyword,
IntKeyword,
InterfaceKeyword,
Int8Keyword,
Int16Keyword,
Int32Keyword,
Int64Keyword,
IsKeyword,
MixinKeyword,
NamespaceKeyword,
NotKeyword,
NullKeyword,
OrKeyword,
OutKeyword,
OverrideKeyword,
PrivateKeyword,
PropertyKeyword,
ProtectedKeyword,
ReturnKeyword,
SetKeyword,
SharedKeyword,
SuperKeyword,
SwitchKeyword,
ThisKeyword,
TrueKeyword,
TryKeyword,
TypeDefKeyword,
UintKeyword,
Uint8Keyword,
Uint16Keyword,
Uint32Keyword,
Uint64Keyword,
VoidKeyword,
WhileKeyword,
XorKeyword,
// AssignOp
Equals,
PlusEquals,
MinusEquals,
StarEquals,
SlashEquals,
LineEquals,
AmpersandEquals,
RoofEquals,
PercentEquals,
StarStarEquals,
LeftLeftEquals,
RightRightEquals,
RightRightRightEquals,
// LogicOp
AmpersandAmpersand,
LineLine,
RoofRoof,
// CompOp
EqualsEquals,
NotEquals,
NotIsKeyword,
GreaterThan,
GreaterThanEquals,
LessThan,
LessThanEquals,
// MathOp
Plus,
Minus,
Star,
Slash,
Percent,
StarStar,
// BitOp
Ampersand,
VerticalLine,
Roof,
LeftLeft,
RightRight,
RightRightRight,
// ExprPreOp
ExclamationMark,
PlusPlus,
MinusMinus,
Tilde,
AtSymbol,
}

350
src/parsing/lexer.rs Normal file
View File

@ -0,0 +1,350 @@
use super::lex_numerical::lex_numeric;
use crate::parsing::lex_tokens::LexToken;
use itertools::{Itertools, MultiPeek};
use std::str::Chars;
#[inline(always)]
fn lex_and_consume(chars: &mut MultiPeek<Chars>, eq: LexToken) -> LexToken {
chars.next();
eq
}
#[inline(always)]
fn lex_eq_or(chars: &mut MultiPeek<Chars>, eq: LexToken, or: LexToken) -> LexToken {
chars.next();
if let Some('=') = chars.peek() {
chars.next();
eq
} else {
or
}
}
#[inline(always)]
fn lex_eq_rep_or(
chars: &mut MultiPeek<Chars>,
v: char,
eq: LexToken,
rep: LexToken,
or: LexToken,
) -> LexToken {
chars.next();
return match chars.peek() {
Some(c) => {
if *c == v {
chars.next();
rep
} else if *c == '=' {
chars.next();
eq
} else {
or
}
}
None => or,
};
}
type LT = LexToken;
fn lex_keyword_or_identifier(chars: &mut MultiPeek<Chars>) -> LexToken {
let mut reading = true;
let mut length = 1;
while reading {
match chars.peek() {
Some(c) => match c {
'a'..'z' | 'A'..'Z' | '_' | '0'..'9' => {
length += 1;
}
_ => {
reading = false;
}
},
None => {
reading = false;
}
};
}
chars.reset_peek();
let c: String = chars.take(length).collect();
match c.as_str() {
"and" => LT::AndKeyword,
"abstract" => LT::AbstractKeyword,
"auto" => LT::AutoKeyword,
"bool" => LT::BoolKeyword,
"break" => LT::BreakKeyword,
"case" => LT::CaseKeyword,
"cast" => LT::CastKeyword,
"catch" => LT::CatchKeyword,
"class" => LT::ClassKeyword,
"const" => LT::ConstKeyword,
"continue" => LT::ContinueKeyword,
"default" => LT::DefaultKeyword,
"do" => LT::DoKeyword,
"double" => LT::DoubleKeyword,
"else" => LT::ElseKeyword,
"enum" => LT::EnumKeyword,
"explicit" => LT::ExplicitKeyword,
"external" => LT::ExternalKeyword,
"false" => LT::FalseKeyword,
"final" => LT::FinalKeyword,
"float" => LT::FloatKeyword,
"for" => LT::ForKeyword,
"from" => LT::FromKeyword,
"funcdef" => LT::FuncDefKeyword,
"function" => LT::FunctionKeyword,
"get" => LT::GetKeyword,
"if" => LT::IfKeyword,
"import" => LT::ImportKeyword,
"in" => LT::InKeyword,
"inout" => LT::InOutKeyword,
"int" => LT::IntKeyword,
"interface" => LT::InterfaceKeyword,
"int8" => LT::Int8Keyword,
"int16" => LT::Int16Keyword,
"int32" => LT::Int32Keyword,
"int64" => LT::Int64Keyword,
"is" => LT::IsKeyword,
"mixin" => LT::MixinKeyword,
"namespace" => LT::NamespaceKeyword,
"not" => LT::NotKeyword,
"null" => LT::NullKeyword,
"or" => LT::OrKeyword,
"out" => LT::OutKeyword,
"override" => LT::OverrideKeyword,
"private" => LT::PrivateKeyword,
"property" => LT::PropertyKeyword,
"protected" => LT::ProtectedKeyword,
"return" => LT::ReturnKeyword,
"set" => LT::SetKeyword,
"shared" => LT::SharedKeyword,
"super" => LT::SuperKeyword,
"switch" => LT::SwitchKeyword,
"this" => LT::ThisKeyword,
"true" => LT::TrueKeyword,
"try" => LT::TryKeyword,
"typedef" => LT::TypeDefKeyword,
"uint" => LT::UintKeyword,
"uint8" => LT::Uint8Keyword,
"uint16" => LT::Uint16Keyword,
"uint32" => LT::Uint32Keyword,
"uint64" => LT::Uint64Keyword,
"void" => LT::VoidKeyword,
"while" => LT::WhileKeyword,
"xor" => LT::XorKeyword,
_ => LT::Identifier(c),
}
}
fn lex_string(chars: &mut MultiPeek<Chars>, opening_char: &char, heredoc: bool) -> LexToken {
chars.next();
if heredoc {
chars.next();
chars.next();
}
let mut length: i32 = 0;
let mut string_length = 0;
let mut last_was_control = false;
// We loop twice here. In the first loop we get the number of characters to read, the number of
// characters the string should be, and whether it's valid. This reduces the amount of allocations
// we need to do to read a string.
loop {
let p = chars.peek();
match p {
None => {
// TODO: log error. Strings need to be closed, EOF should error.
unimplemented!();
}
Some(&'\\') if !last_was_control => {
last_was_control = true;
length += 1;
}
Some(c) => {
if c == opening_char && !last_was_control {
if heredoc {
if chars.peek() == Some(&'\"') && chars.peek() == Some(&'\"') {
break;
} else {
length += 1;
string_length += 1;
}
} else {
break;
}
}
length += 1;
string_length += 1;
last_was_control = false;
}
}
}
chars.reset_peek();
let mut s: String = String::with_capacity(string_length);
for _ in 0..length {
let p = chars.next().unwrap();
match p {
'\\' => {
if last_was_control {
s.push('\\');
} else {
last_was_control = true;
continue;
}
}
'0' if last_was_control => s.push('\0'),
'n' if last_was_control => s.push('\n'),
'r' if last_was_control => s.push('\r'),
't' if last_was_control => s.push('\t'),
_ => s.push(p),
};
last_was_control = false;
}
assert_eq!(s.len(), string_length);
chars.reset_peek();
chars.next();
if heredoc {
chars.next();
chars.next();
}
LT::StringLiteral(s)
}
pub fn lex(s: &str) -> Vec<LT> {
let mut tokens: Vec<LT> = Vec::new();
let mut chars = s.chars().multipeek();
let mut reading = true;
while reading {
let p = chars.peek().cloned();
match p {
Some(c) => match c {
' ' | '\t' | '\r' | '\n' => {
chars.next();
tokens.push(LT::WhiteSpace);
}
'=' => tokens.push(lex_eq_or(&mut chars, LT::EqualsEquals, LT::Equals)),
'+' => tokens.push(lex_eq_rep_or(
&mut chars,
'+',
LT::PlusEquals,
LT::PlusPlus,
LT::Plus,
)),
'-' => tokens.push(lex_eq_rep_or(
&mut chars,
'-',
LT::MinusEquals,
LT::MinusMinus,
LT::Minus,
)),
'*' => {
if chars.peek() == Some(&'*') {
chars.next();
tokens.push(lex_eq_or(&mut chars, LT::StarStarEquals, LT::StarStar))
} else {
tokens.push(lex_eq_or(&mut chars, LT::StarEquals, LT::Star))
}
}
'/' => tokens.push(lex_eq_or(&mut chars, LT::SlashEquals, LT::Slash)),
'%' => tokens.push(lex_eq_or(&mut chars, LT::PercentEquals, LT::Percent)),
'|' => tokens.push(lex_eq_rep_or(
&mut chars,
'|',
LT::LineEquals,
LT::LineLine,
LT::VerticalLine,
)),
'&' => tokens.push(lex_eq_rep_or(
&mut chars,
'&',
LT::AmpersandEquals,
LT::AmpersandAmpersand,
LT::Ampersand,
)),
'^' => tokens.push(lex_eq_rep_or(
&mut chars,
'^',
LT::RoofEquals,
LT::RoofRoof,
LT::Roof,
)),
'<' => {
if chars.peek() == Some(&'<') {
chars.next();
tokens.push(lex_eq_or(&mut chars, LT::LeftLeftEquals, LT::LeftLeft))
} else {
tokens.push(lex_eq_or(&mut chars, LT::LessThanEquals, LT::LessThan))
}
}
'>' => {
if chars.peek() == Some(&'>') {
if chars.peek() == Some(&'>') {
chars.next();
chars.next();
tokens.push(lex_eq_or(
&mut chars,
LT::RightRightRightEquals,
LT::RightRightRight,
))
} else {
chars.next();
tokens.push(lex_eq_or(&mut chars, LT::RightRightEquals, LT::RightRight))
}
} else {
tokens.push(lex_eq_or(
&mut chars,
LT::GreaterThanEquals,
LT::GreaterThan,
))
}
}
'!' => {
let next = chars.peek();
if next == Some(&'=') {
chars.next();
chars.next();
tokens.push(LT::NotEquals);
} else if next == Some(&'i') && chars.peek() == Some(&'s') {
chars.next();
chars.next();
chars.next();
tokens.push(LT::NotIsKeyword);
} else {
chars.next();
tokens.push(LT::ExclamationMark);
}
}
'~' => tokens.push(lex_and_consume(&mut chars, LT::Tilde)),
'@' => tokens.push(lex_and_consume(&mut chars, LT::AtSymbol)),
';' => tokens.push(lex_and_consume(&mut chars, LT::Semicolon)),
':' => tokens.push(lex_and_consume(&mut chars, LT::Colon)),
'(' => tokens.push(lex_and_consume(&mut chars, LT::OpenBracket)),
')' => tokens.push(lex_and_consume(&mut chars, LT::CloseBracket)),
'{' => tokens.push(lex_and_consume(&mut chars, LT::OpenCurlyBracket)),
'}' => tokens.push(lex_and_consume(&mut chars, LT::CloseCurlyBracket)),
'[' => tokens.push(lex_and_consume(&mut chars, LT::OpenBlockBracket)),
']' => tokens.push(lex_and_consume(&mut chars, LT::CloseBlockBracket)),
'0'..'9' => tokens.push(lex_numeric(&mut chars)),
'a'..'z' | 'A'..'Z' | '_' => tokens.push(lex_keyword_or_identifier(&mut chars)),
'\'' => tokens.push(lex_string(&mut chars, &'\'', false)),
'"' if chars.peek() == Some(&'\"') && chars.peek() == Some(&'\"') => {
tokens.push(lex_string(&mut chars, &'"', true))
}
'"' => tokens.push(lex_string(&mut chars, &'"', false)),
// TODO: Definitely not unreachable. Log a proper error here.
_ => unreachable!(),
},
None => {
tokens.push(LT::EndOfFile);
reading = false;
}
}
}
tokens
}

241
src/parsing/lexer_tests.rs Normal file
View File

@ -0,0 +1,241 @@
use super::lex_tokens::LexToken;
use super::lexer::lex;
macro_rules! lex_token_test {
( $a: ident, $b: expr, $c: expr) => {
#[test]
fn $a() {
let tokens = lex($b);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], $c);
assert_eq!(tokens[1], LexToken::EndOfFile);
}
};
}
macro_rules! lex_identifier_test {
( $a: ident, $b: expr) => {
#[test]
fn $a() {
let tokens = lex($b);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], LexToken::Identifier($b.to_string()));
assert_eq!(tokens[1], LexToken::EndOfFile);
}
};
}
macro_rules! lex_integer_test {
( $a: ident, $b: expr, $c: expr) => {
#[test]
fn $a() {
let tokens = lex($b);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], LexToken::IntegerLiteral($c));
assert_eq!(tokens[1], LexToken::EndOfFile);
}
};
}
macro_rules! lex_float_test {
( $a: ident, $b: expr, $c: expr) => {
#[test]
fn $a() {
let tokens = lex($b);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], LexToken::FloatLiteral($c));
assert_eq!(tokens[1], LexToken::EndOfFile);
}
};
}
macro_rules! lex_string_test {
( $a: ident, $b: expr, $c: expr) => {
#[test]
fn $a() {
let tokens = lex($b);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], LexToken::StringLiteral($c.to_string()));
assert_eq!(tokens[1], LexToken::EndOfFile);
}
};
}
lex_token_test!(lex_space, " ", LexToken::WhiteSpace);
lex_token_test!(lex_tab, "\t", LexToken::WhiteSpace);
lex_token_test!(lex_return_line, "\r", LexToken::WhiteSpace);
lex_token_test!(lex_newline, "\n", LexToken::WhiteSpace);
lex_token_test!(lex_equals, "=", LexToken::Equals);
lex_token_test!(lex_equals_equals, "==", LexToken::EqualsEquals);
lex_token_test!(lex_plus, "+", LexToken::Plus);
lex_token_test!(lex_plus_plus, "++", LexToken::PlusPlus);
lex_token_test!(lex_plus_equals, "+=", LexToken::PlusEquals);
lex_token_test!(lex_minus, "-", LexToken::Minus);
lex_token_test!(lex_minus_minus, "--", LexToken::MinusMinus);
lex_token_test!(lex_minus_equals, "-=", LexToken::MinusEquals);
lex_token_test!(lex_star, "*", LexToken::Star);
lex_token_test!(lex_star_equals, "*=", LexToken::StarEquals);
lex_token_test!(lex_star_star, "**", LexToken::StarStar);
lex_token_test!(lex_star_star_equals, "**=", LexToken::StarStarEquals);
lex_token_test!(lex_slash, "/", LexToken::Slash);
lex_token_test!(lex_slash_equals, "/=", LexToken::SlashEquals);
lex_token_test!(lex_percent, "%", LexToken::Percent);
lex_token_test!(lex_percent_equals, "%=", LexToken::PercentEquals);
lex_token_test!(lex_exclamation_mark, "!", LexToken::ExclamationMark);
lex_token_test!(lex_not_equals, "!=", LexToken::NotEquals);
lex_token_test!(lex_not_is_keyword, "!is", LexToken::NotIsKeyword);
lex_token_test!(lex_vert_line, "|", LexToken::VerticalLine);
lex_token_test!(lex_vert_line_equals, "|=", LexToken::LineEquals);
lex_token_test!(lex_line_line, "||", LexToken::LineLine);
lex_token_test!(lex_ampersand, "&", LexToken::Ampersand);
lex_token_test!(lex_ampersand_equals, "&=", LexToken::AmpersandEquals);
lex_token_test!(lex_ampersand_ampersand, "&&", LexToken::AmpersandAmpersand);
lex_token_test!(lex_less_than, "<", LexToken::LessThan);
lex_token_test!(lex_less_than_equals, "<=", LexToken::LessThanEquals);
lex_token_test!(lex_left_left, "<<", LexToken::LeftLeft);
lex_token_test!(lex_left_left_equals, "<<=", LexToken::LeftLeftEquals);
lex_token_test!(lex_greater_than, ">", LexToken::GreaterThan);
lex_token_test!(lex_greater_than_equals, ">=", LexToken::GreaterThanEquals);
lex_token_test!(lex_right_right, ">>", LexToken::RightRight);
lex_token_test!(lex_right_right_equals, ">>=", LexToken::RightRightEquals);
lex_token_test!(lex_right_right_right, ">>>", LexToken::RightRightRight);
lex_token_test!(
lex_right_right_right_equals,
">>>=",
LexToken::RightRightRightEquals
);
lex_token_test!(lex_tilde, "~", LexToken::Tilde);
lex_token_test!(lex_at_symbol, "@", LexToken::AtSymbol);
lex_token_test!(lex_and_keyword, "and", LexToken::AndKeyword);
lex_token_test!(lex_abstract_keyword, "abstract", LexToken::AbstractKeyword);
lex_token_test!(lex_auto_keyword, "auto", LexToken::AutoKeyword);
lex_token_test!(lex_bool_keyword, "bool", LexToken::BoolKeyword);
lex_token_test!(lex_break_keyword, "break", LexToken::BreakKeyword);
lex_token_test!(lex_case_keyword, "case", LexToken::CaseKeyword);
lex_token_test!(lex_cast_keyword, "cast", LexToken::CastKeyword);
lex_token_test!(lex_catch_keyword, "catch", LexToken::CatchKeyword);
lex_token_test!(lex_class_keyword, "class", LexToken::ClassKeyword);
lex_token_test!(lex_const_keyword, "const", LexToken::ConstKeyword);
lex_token_test!(lex_continue_keyword, "continue", LexToken::ContinueKeyword);
lex_token_test!(lex_default_keyword, "default", LexToken::DefaultKeyword);
lex_token_test!(lex_do_keyword, "do", LexToken::DoKeyword);
lex_token_test!(lex_double_keyword, "double", LexToken::DoubleKeyword);
lex_token_test!(lex_else_keyword, "else", LexToken::ElseKeyword);
lex_token_test!(lex_enum_keyword, "enum", LexToken::EnumKeyword);
lex_token_test!(lex_explicit_keyword, "explicit", LexToken::ExplicitKeyword);
lex_token_test!(lex_external_keyword, "external", LexToken::ExternalKeyword);
lex_token_test!(lex_false_keyword, "false", LexToken::FalseKeyword);
lex_token_test!(lex_final_keyword, "final", LexToken::FinalKeyword);
lex_token_test!(lex_float_keyword, "float", LexToken::FloatKeyword);
lex_token_test!(lex_for_keyword, "for", LexToken::ForKeyword);
lex_token_test!(lex_from_keyword, "from", LexToken::FromKeyword);
lex_token_test!(lex_funcdef_keyword, "funcdef", LexToken::FuncDefKeyword);
lex_token_test!(lex_function_keyword, "function", LexToken::FunctionKeyword);
lex_token_test!(lex_get_keyword, "get", LexToken::GetKeyword);
lex_token_test!(lex_if_keyword, "if", LexToken::IfKeyword);
lex_token_test!(lex_import_keyword, "import", LexToken::ImportKeyword);
lex_token_test!(lex_in_keyword, "in", LexToken::InKeyword);
lex_token_test!(lex_inout_keyword, "inout", LexToken::InOutKeyword);
lex_token_test!(lex_int_keyword, "int", LexToken::IntKeyword);
lex_token_test!(
lex_interface_keyword,
"interface",
LexToken::InterfaceKeyword
);
lex_token_test!(lex_int8_keyword, "int8", LexToken::Int8Keyword);
lex_token_test!(lex_int16_keyword, "int16", LexToken::Int16Keyword);
lex_token_test!(lex_int32_keyword, "int32", LexToken::Int32Keyword);
lex_token_test!(lex_int64_keyword, "int64", LexToken::Int64Keyword);
lex_token_test!(lex_is_keyword, "is", LexToken::IsKeyword);
lex_token_test!(lex_mixin_keyword, "mixin", LexToken::MixinKeyword);
lex_token_test!(
lex_namespace_keyword,
"namespace",
LexToken::NamespaceKeyword
);
lex_token_test!(lex_not_keyword, "not", LexToken::NotKeyword);
lex_token_test!(lex_null_keyword, "null", LexToken::NullKeyword);
lex_token_test!(lex_or_keyword, "or", LexToken::OrKeyword);
lex_token_test!(lex_out_keyword, "out", LexToken::OutKeyword);
lex_token_test!(lex_override_keyword, "override", LexToken::OverrideKeyword);
lex_token_test!(lex_private_keyword, "private", LexToken::PrivateKeyword);
lex_token_test!(lex_property_keyword, "property", LexToken::PropertyKeyword);
lex_token_test!(
lex_protected_keyword,
"protected",
LexToken::ProtectedKeyword
);
lex_token_test!(lex_return_keyword, "return", LexToken::ReturnKeyword);
lex_token_test!(lex_set_keyword, "set", LexToken::SetKeyword);
lex_token_test!(lex_shared_keyword, "shared", LexToken::SharedKeyword);
lex_token_test!(lex_super_keyword, "super", LexToken::SuperKeyword);
lex_token_test!(lex_switch_keyword, "switch", LexToken::SwitchKeyword);
lex_token_test!(lex_this_keyword, "this", LexToken::ThisKeyword);
lex_token_test!(lex_true_keyword, "true", LexToken::TrueKeyword);
lex_token_test!(lex_try_keyword, "try", LexToken::TryKeyword);
lex_token_test!(lex_typedef_keyword, "typedef", LexToken::TypeDefKeyword);
lex_token_test!(lex_uint_keyword, "uint", LexToken::UintKeyword);
lex_token_test!(lex_uint8_keyword, "uint8", LexToken::Uint8Keyword);
lex_token_test!(lex_uint16_keyword, "uint16", LexToken::Uint16Keyword);
lex_token_test!(lex_uint32_keyword, "uint32", LexToken::Uint32Keyword);
lex_token_test!(lex_void_keyword, "void", LexToken::VoidKeyword);
lex_token_test!(lex_while_keyword, "while", LexToken::WhileKeyword);
lex_token_test!(lex_xor_keyword, "xor", LexToken::XorKeyword);
lex_identifier_test!(lex_basic_identifier_foo, "foo");
lex_identifier_test!(lex_basic_identifier_foobar, "foobar");
lex_integer_test!(lex_zero, "0", 0);
lex_integer_test!(lex_one_two_three_four, "1234", 1234);
lex_integer_test!(lex_specific_one_two_three_four, "0d1234", 1234);
lex_integer_test!(lex_decimal_with_underline, "123_456", 123456);
lex_integer_test!(lex_specific_decimal_with_underline, "0D123_456", 123456);
lex_integer_test!(lex_hexadecimal_0f, "0X0F", 15);
lex_integer_test!(lex_hexadecimal_ff, "0xff", 255);
lex_integer_test!(lex_hexadecimal_ff_ff, "0xff_ff", 65535);
lex_integer_test!(lex_octal_112, "0o112", 74);
lex_integer_test!(lex_binary_1110, "0b1110", 14);
lex_integer_test!(lex_binary_01110, "0b01110", 14);
lex_float_test!(lex_zero_float, "0.0", 0.0);
lex_float_test!(lex_half, "0.5", 0.5);
lex_float_test!(lex_point_0_5, "0.05", 0.05);
lex_float_test!(lex_half_with_exponent, "0.5e10", 0.5e10);
lex_string_test!(lex_simple_string, "\"foo\"", "foo");
lex_string_test!(lex_simple_string_single_quote, "\'foo\'", "foo");
lex_string_test!(lex_string_with_escape, "\"fo\\\"o\"", "fo\"o");
lex_string_test!(lex_string_with_new_line, "\"fo\\no\"", "fo\no");
lex_string_test!(lex_heredoc_string, "\"\"\"foo\"\"\"", "foo");
lex_string_test!(lex_heredoc_string_with_quote, "\"\"\"fo\"o\"\"\"", "fo\"o");
#[test]
fn lex_two_identifier() {
let tokens = lex("foo bar");
assert_eq!(tokens.len(), 4);
assert_eq!(tokens[0], LexToken::Identifier("foo".to_string()));
assert_eq!(tokens[1], LexToken::WhiteSpace);
assert_eq!(tokens[2], LexToken::Identifier("bar".to_string()));
assert_eq!(tokens[3], LexToken::EndOfFile);
}
#[test]
fn lex_multiple_tokens_with_not_is() {
let tokens = lex("a !is b");
assert_eq!(tokens.len(), 6);
assert_eq!(tokens[0], LexToken::Identifier("a".to_string()));
assert_eq!(tokens[1], LexToken::WhiteSpace);
assert_eq!(tokens[2], LexToken::NotIsKeyword);
assert_eq!(tokens[3], LexToken::WhiteSpace);
assert_eq!(tokens[4], LexToken::Identifier("b".to_string()));
assert_eq!(tokens[5], LexToken::EndOfFile);
}

6
src/parsing/mod.rs Normal file
View File

@ -0,0 +1,6 @@
mod lex_numerical;
pub mod lex_tokens;
pub mod lexer;
#[cfg(test)]
mod lexer_tests;