Implements logging, pass spans to lexed tokens, begin work on parser

This commit is contained in:
Deukhoofd 2021-06-05 20:10:40 +02:00
parent f1af568cb8
commit 301ffd7496
Signed by: Deukhoofd
GPG Key ID: F63E044490819F6F
15 changed files with 1029 additions and 619 deletions

View File

@ -1,5 +1,8 @@
#![feature(concat_idents)] #![feature(concat_idents)]
#![feature(exclusive_range_pattern)] #![feature(exclusive_range_pattern)]
#![feature(assert_matches)]
pub(crate) mod defines; pub(crate) mod defines;
pub mod logger;
pub mod parsing; pub mod parsing;
pub mod span;

6
src/logger/messages.rs Normal file
View File

@ -0,0 +1,6 @@
#[derive(Debug)]
pub enum Message {
UnexpectedCharacter(char),
InvalidCharacter { found: char, expected: char },
UnclosedStringLiteral,
}

23
src/logger/mod.rs Normal file
View File

@ -0,0 +1,23 @@
pub mod messages;
use crate::span::Span;
use messages::Message;
pub struct Log {
pub message: Message,
pub filename: String,
pub span: Span,
}
pub struct Logger {
pub logs: Vec<Log>,
}
impl Logger {
pub fn log(&mut self, message: Message, filename: String, start: usize, end: usize) {
self.logs.push(Log {
message,
filename,
span: Span { start, end },
})
}
}

View File

@ -1,350 +0,0 @@
use super::lex_numerical::lex_numeric;
use crate::parsing::lex_tokens::LexToken;
use itertools::{Itertools, MultiPeek};
use std::str::Chars;
#[inline(always)]
fn lex_and_consume(chars: &mut MultiPeek<Chars>, eq: LexToken) -> LexToken {
chars.next();
eq
}
#[inline(always)]
fn lex_eq_or(chars: &mut MultiPeek<Chars>, eq: LexToken, or: LexToken) -> LexToken {
chars.next();
if let Some('=') = chars.peek() {
chars.next();
eq
} else {
or
}
}
#[inline(always)]
fn lex_eq_rep_or(
chars: &mut MultiPeek<Chars>,
v: char,
eq: LexToken,
rep: LexToken,
or: LexToken,
) -> LexToken {
chars.next();
return match chars.peek() {
Some(c) => {
if *c == v {
chars.next();
rep
} else if *c == '=' {
chars.next();
eq
} else {
or
}
}
None => or,
};
}
type LT = LexToken;
fn lex_keyword_or_identifier(chars: &mut MultiPeek<Chars>) -> LexToken {
let mut reading = true;
let mut length = 1;
while reading {
match chars.peek() {
Some(c) => match c {
'a'..'z' | 'A'..'Z' | '_' | '0'..'9' => {
length += 1;
}
_ => {
reading = false;
}
},
None => {
reading = false;
}
};
}
chars.reset_peek();
let c: String = chars.take(length).collect();
match c.as_str() {
"and" => LT::AndKeyword,
"abstract" => LT::AbstractKeyword,
"auto" => LT::AutoKeyword,
"bool" => LT::BoolKeyword,
"break" => LT::BreakKeyword,
"case" => LT::CaseKeyword,
"cast" => LT::CastKeyword,
"catch" => LT::CatchKeyword,
"class" => LT::ClassKeyword,
"const" => LT::ConstKeyword,
"continue" => LT::ContinueKeyword,
"default" => LT::DefaultKeyword,
"do" => LT::DoKeyword,
"double" => LT::DoubleKeyword,
"else" => LT::ElseKeyword,
"enum" => LT::EnumKeyword,
"explicit" => LT::ExplicitKeyword,
"external" => LT::ExternalKeyword,
"false" => LT::FalseKeyword,
"final" => LT::FinalKeyword,
"float" => LT::FloatKeyword,
"for" => LT::ForKeyword,
"from" => LT::FromKeyword,
"funcdef" => LT::FuncDefKeyword,
"function" => LT::FunctionKeyword,
"get" => LT::GetKeyword,
"if" => LT::IfKeyword,
"import" => LT::ImportKeyword,
"in" => LT::InKeyword,
"inout" => LT::InOutKeyword,
"int" => LT::IntKeyword,
"interface" => LT::InterfaceKeyword,
"int8" => LT::Int8Keyword,
"int16" => LT::Int16Keyword,
"int32" => LT::Int32Keyword,
"int64" => LT::Int64Keyword,
"is" => LT::IsKeyword,
"mixin" => LT::MixinKeyword,
"namespace" => LT::NamespaceKeyword,
"not" => LT::NotKeyword,
"null" => LT::NullKeyword,
"or" => LT::OrKeyword,
"out" => LT::OutKeyword,
"override" => LT::OverrideKeyword,
"private" => LT::PrivateKeyword,
"property" => LT::PropertyKeyword,
"protected" => LT::ProtectedKeyword,
"return" => LT::ReturnKeyword,
"set" => LT::SetKeyword,
"shared" => LT::SharedKeyword,
"super" => LT::SuperKeyword,
"switch" => LT::SwitchKeyword,
"this" => LT::ThisKeyword,
"true" => LT::TrueKeyword,
"try" => LT::TryKeyword,
"typedef" => LT::TypeDefKeyword,
"uint" => LT::UintKeyword,
"uint8" => LT::Uint8Keyword,
"uint16" => LT::Uint16Keyword,
"uint32" => LT::Uint32Keyword,
"uint64" => LT::Uint64Keyword,
"void" => LT::VoidKeyword,
"while" => LT::WhileKeyword,
"xor" => LT::XorKeyword,
_ => LT::Identifier(c),
}
}
fn lex_string(chars: &mut MultiPeek<Chars>, opening_char: &char, heredoc: bool) -> LexToken {
chars.next();
if heredoc {
chars.next();
chars.next();
}
let mut length: i32 = 0;
let mut string_length = 0;
let mut last_was_control = false;
// We loop twice here. In the first loop we get the number of characters to read, the number of
// characters the string should be, and whether it's valid. This reduces the amount of allocations
// we need to do to read a string.
loop {
let p = chars.peek();
match p {
None => {
// TODO: log error. Strings need to be closed, EOF should error.
unimplemented!();
}
Some(&'\\') if !last_was_control => {
last_was_control = true;
length += 1;
}
Some(c) => {
if c == opening_char && !last_was_control {
if heredoc {
if chars.peek() == Some(&'\"') && chars.peek() == Some(&'\"') {
break;
} else {
length += 1;
string_length += 1;
}
} else {
break;
}
}
length += 1;
string_length += 1;
last_was_control = false;
}
}
}
chars.reset_peek();
let mut s: String = String::with_capacity(string_length);
for _ in 0..length {
let p = chars.next().unwrap();
match p {
'\\' => {
if last_was_control {
s.push('\\');
} else {
last_was_control = true;
continue;
}
}
'0' if last_was_control => s.push('\0'),
'n' if last_was_control => s.push('\n'),
'r' if last_was_control => s.push('\r'),
't' if last_was_control => s.push('\t'),
_ => s.push(p),
};
last_was_control = false;
}
assert_eq!(s.len(), string_length);
chars.reset_peek();
chars.next();
if heredoc {
chars.next();
chars.next();
}
LT::StringLiteral(s)
}
pub fn lex(s: &str) -> Vec<LT> {
let mut tokens: Vec<LT> = Vec::new();
let mut chars = s.chars().multipeek();
let mut reading = true;
while reading {
let p = chars.peek().cloned();
match p {
Some(c) => match c {
' ' | '\t' | '\r' | '\n' => {
chars.next();
tokens.push(LT::WhiteSpace);
}
'=' => tokens.push(lex_eq_or(&mut chars, LT::EqualsEquals, LT::Equals)),
'+' => tokens.push(lex_eq_rep_or(
&mut chars,
'+',
LT::PlusEquals,
LT::PlusPlus,
LT::Plus,
)),
'-' => tokens.push(lex_eq_rep_or(
&mut chars,
'-',
LT::MinusEquals,
LT::MinusMinus,
LT::Minus,
)),
'*' => {
if chars.peek() == Some(&'*') {
chars.next();
tokens.push(lex_eq_or(&mut chars, LT::StarStarEquals, LT::StarStar))
} else {
tokens.push(lex_eq_or(&mut chars, LT::StarEquals, LT::Star))
}
}
'/' => tokens.push(lex_eq_or(&mut chars, LT::SlashEquals, LT::Slash)),
'%' => tokens.push(lex_eq_or(&mut chars, LT::PercentEquals, LT::Percent)),
'|' => tokens.push(lex_eq_rep_or(
&mut chars,
'|',
LT::LineEquals,
LT::LineLine,
LT::VerticalLine,
)),
'&' => tokens.push(lex_eq_rep_or(
&mut chars,
'&',
LT::AmpersandEquals,
LT::AmpersandAmpersand,
LT::Ampersand,
)),
'^' => tokens.push(lex_eq_rep_or(
&mut chars,
'^',
LT::RoofEquals,
LT::RoofRoof,
LT::Roof,
)),
'<' => {
if chars.peek() == Some(&'<') {
chars.next();
tokens.push(lex_eq_or(&mut chars, LT::LeftLeftEquals, LT::LeftLeft))
} else {
tokens.push(lex_eq_or(&mut chars, LT::LessThanEquals, LT::LessThan))
}
}
'>' => {
if chars.peek() == Some(&'>') {
if chars.peek() == Some(&'>') {
chars.next();
chars.next();
tokens.push(lex_eq_or(
&mut chars,
LT::RightRightRightEquals,
LT::RightRightRight,
))
} else {
chars.next();
tokens.push(lex_eq_or(&mut chars, LT::RightRightEquals, LT::RightRight))
}
} else {
tokens.push(lex_eq_or(
&mut chars,
LT::GreaterThanEquals,
LT::GreaterThan,
))
}
}
'!' => {
let next = chars.peek();
if next == Some(&'=') {
chars.next();
chars.next();
tokens.push(LT::NotEquals);
} else if next == Some(&'i') && chars.peek() == Some(&'s') {
chars.next();
chars.next();
chars.next();
tokens.push(LT::NotIsKeyword);
} else {
chars.next();
tokens.push(LT::ExclamationMark);
}
}
'~' => tokens.push(lex_and_consume(&mut chars, LT::Tilde)),
'@' => tokens.push(lex_and_consume(&mut chars, LT::AtSymbol)),
';' => tokens.push(lex_and_consume(&mut chars, LT::Semicolon)),
':' => tokens.push(lex_and_consume(&mut chars, LT::Colon)),
'(' => tokens.push(lex_and_consume(&mut chars, LT::OpenBracket)),
')' => tokens.push(lex_and_consume(&mut chars, LT::CloseBracket)),
'{' => tokens.push(lex_and_consume(&mut chars, LT::OpenCurlyBracket)),
'}' => tokens.push(lex_and_consume(&mut chars, LT::CloseCurlyBracket)),
'[' => tokens.push(lex_and_consume(&mut chars, LT::OpenBlockBracket)),
']' => tokens.push(lex_and_consume(&mut chars, LT::CloseBlockBracket)),
'0'..'9' => tokens.push(lex_numeric(&mut chars)),
'a'..'z' | 'A'..'Z' | '_' => tokens.push(lex_keyword_or_identifier(&mut chars)),
'\'' => tokens.push(lex_string(&mut chars, &'\'', false)),
'"' if chars.peek() == Some(&'\"') && chars.peek() == Some(&'\"') => {
tokens.push(lex_string(&mut chars, &'"', true))
}
'"' => tokens.push(lex_string(&mut chars, &'"', false)),
// TODO: Definitely not unreachable. Log a proper error here.
_ => unreachable!(),
},
None => {
tokens.push(LT::EndOfFile);
reading = false;
}
}
}
tokens
}

View File

@ -1,7 +1,6 @@
use crate::defines::{LiteralFloat, LiteralInt}; use crate::defines::{LiteralFloat, LiteralInt};
use crate::parsing::lex_tokens::LexToken; use crate::parsing::lexer::lex_tokens::TokenType;
use itertools::MultiPeek; use crate::parsing::lexer::StringWalker;
use std::str::Chars;
#[inline(always)] #[inline(always)]
fn get_decimal_value(c: char) -> Option<LiteralInt> { fn get_decimal_value(c: char) -> Option<LiteralInt> {
@ -21,7 +20,7 @@ fn get_decimal_value(c: char) -> Option<LiteralInt> {
} }
#[inline(always)] #[inline(always)]
fn lex_numeric_default(chars: &mut MultiPeek<Chars>) -> LexToken { fn lex_numeric_default(chars: &mut StringWalker) -> TokenType {
let mut int_value: LiteralInt = 0; let mut int_value: LiteralInt = 0;
let mut decimal_value: LiteralInt = 0; let mut decimal_value: LiteralInt = 0;
let mut exponent_value: LiteralInt = 0; let mut exponent_value: LiteralInt = 0;
@ -81,9 +80,9 @@ fn lex_numeric_default(chars: &mut MultiPeek<Chars>) -> LexToken {
if is_exponent { if is_exponent {
val *= exponent_value.pow(10) as LiteralFloat; val *= exponent_value.pow(10) as LiteralFloat;
} }
LexToken::FloatLiteral(val) TokenType::FloatLiteral(val)
} else { } else {
LexToken::IntegerLiteral(int_value) TokenType::IntegerLiteral(int_value)
} }
} }
@ -111,7 +110,7 @@ fn get_hexadecimal_value(c: char) -> Option<LiteralInt> {
} }
#[inline(always)] #[inline(always)]
fn lex_numeric_hexadecimal(chars: &mut MultiPeek<Chars>) -> LexToken { fn lex_numeric_hexadecimal(chars: &mut StringWalker) -> TokenType {
let mut int_value: LiteralInt = 0; let mut int_value: LiteralInt = 0;
let mut reading = true; let mut reading = true;
let mut n = chars.peek().cloned(); let mut n = chars.peek().cloned();
@ -132,7 +131,7 @@ fn lex_numeric_hexadecimal(chars: &mut MultiPeek<Chars>) -> LexToken {
} }
n = chars.peek().cloned(); n = chars.peek().cloned();
} }
LexToken::IntegerLiteral(int_value) TokenType::IntegerLiteral(int_value)
} }
#[inline(always)] #[inline(always)]
@ -151,7 +150,7 @@ fn get_octal_value(c: char) -> Option<LiteralInt> {
} }
#[inline(always)] #[inline(always)]
fn lex_numeric_octal(chars: &mut MultiPeek<Chars>) -> LexToken { fn lex_numeric_octal(chars: &mut StringWalker) -> TokenType {
let mut int_value: LiteralInt = 0; let mut int_value: LiteralInt = 0;
let mut reading = true; let mut reading = true;
let mut n = chars.peek().cloned(); let mut n = chars.peek().cloned();
@ -172,7 +171,7 @@ fn lex_numeric_octal(chars: &mut MultiPeek<Chars>) -> LexToken {
} }
n = chars.peek().cloned(); n = chars.peek().cloned();
} }
LexToken::IntegerLiteral(int_value) TokenType::IntegerLiteral(int_value)
} }
#[inline(always)] #[inline(always)]
@ -185,7 +184,7 @@ fn get_binary_value(c: char) -> Option<LiteralInt> {
} }
#[inline(always)] #[inline(always)]
fn lex_numeric_binary(chars: &mut MultiPeek<Chars>) -> LexToken { fn lex_numeric_binary(chars: &mut StringWalker) -> TokenType {
let mut int_value: LiteralInt = 0; let mut int_value: LiteralInt = 0;
let mut reading = true; let mut reading = true;
let mut n = chars.peek().cloned(); let mut n = chars.peek().cloned();
@ -206,37 +205,44 @@ fn lex_numeric_binary(chars: &mut MultiPeek<Chars>) -> LexToken {
} }
n = chars.peek().cloned(); n = chars.peek().cloned();
} }
LexToken::IntegerLiteral(int_value) TokenType::IntegerLiteral(int_value)
} }
#[inline(always)] #[inline(always)]
pub fn lex_numeric(chars: &mut MultiPeek<Chars>) -> LexToken { pub(super) fn lex_numeric(chars: &mut StringWalker, f: &mut dyn FnMut(TokenType, usize, usize)) {
chars.reset_peek(); chars.reset_peek();
if chars.peek() == Some(&'0') { let start_pos = chars.real_position;
let token_type = if chars.peek() == Some(&'0') {
match chars.peek() { match chars.peek() {
Some(&'D') | Some(&'d') => { Some(&'D') | Some(&'d') => {
chars.next(); chars.next();
chars.next(); chars.next();
return lex_numeric_default(chars); lex_numeric_default(chars)
} }
Some(&'X') | Some(&'x') => { Some(&'X') | Some(&'x') => {
chars.next(); chars.next();
chars.next(); chars.next();
return lex_numeric_hexadecimal(chars); lex_numeric_hexadecimal(chars)
} }
Some(&'O') | Some(&'o') => { Some(&'O') | Some(&'o') => {
chars.next(); chars.next();
chars.next(); chars.next();
return lex_numeric_octal(chars); lex_numeric_octal(chars)
} }
Some(&'B') | Some(&'b') => { Some(&'B') | Some(&'b') => {
chars.next(); chars.next();
chars.next(); chars.next();
return lex_numeric_binary(chars); lex_numeric_binary(chars)
}
_ => {}
}
} }
_ => {
chars.reset_peek(); chars.reset_peek();
lex_numeric_default(chars) lex_numeric_default(chars)
} }
}
} else {
chars.reset_peek();
lex_numeric_default(chars)
};
f(token_type, start_pos, chars.real_position);
}

View File

@ -1,7 +1,13 @@
use crate::defines::{LiteralFloat, LiteralInt}; use crate::defines::{LiteralFloat, LiteralInt};
use crate::span::Span;
pub struct LexToken {
pub token_type: TokenType,
pub span: Span
}
#[derive(PartialEq, Debug)] #[derive(PartialEq, Debug)]
pub enum LexToken { pub enum TokenType {
EndOfFile, EndOfFile,
WhiteSpace, WhiteSpace,
Identifier(String), Identifier(String),

View File

@ -0,0 +1,324 @@
use super::lex;
use crate::logger::messages::Message;
use crate::parsing::lexer::lex_tokens::TokenType;
macro_rules! lex_token_test {
( $a: ident, $b: expr, $c: expr) => {
#[test]
fn $a() {
let tokens = lex($b, &mut |_message, _span| {
unreachable!();
});
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0].token_type, $c);
assert_eq!(tokens[0].span.start, 0);
assert_eq!(tokens[0].span.end, $b.chars().count());
assert_eq!(tokens[1].token_type, TokenType::EndOfFile);
}
};
}
macro_rules! lex_identifier_test {
( $a: ident, $b: expr) => {
#[test]
fn $a() {
let tokens = lex($b, &mut |_message, _span| {
unreachable!();
});
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0].token_type, TokenType::Identifier($b.to_string()));
assert_eq!(tokens[0].span.start, 0);
assert_eq!(tokens[0].span.end, $b.chars().count());
assert_eq!(tokens[1].token_type, TokenType::EndOfFile);
}
};
}
macro_rules! lex_integer_test {
( $a: ident, $b: expr, $c: expr) => {
#[test]
fn $a() {
let tokens = lex($b, &mut |_message, _span| {
unreachable!();
});
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0].token_type, TokenType::IntegerLiteral($c));
assert_eq!(tokens[0].span.start, 0);
assert_eq!(tokens[0].span.end, $b.chars().count());
assert_eq!(tokens[1].token_type, TokenType::EndOfFile);
}
};
}
macro_rules! lex_float_test {
( $a: ident, $b: expr, $c: expr) => {
#[test]
fn $a() {
let tokens = lex($b, &mut |_message, _span| {
unreachable!();
});
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0].token_type, TokenType::FloatLiteral($c));
assert_eq!(tokens[0].span.start, 0);
assert_eq!(tokens[0].span.end, $b.chars().count());
assert_eq!(tokens[1].token_type, TokenType::EndOfFile);
}
};
}
macro_rules! lex_string_test {
( $a: ident, $b: expr, $c: expr) => {
#[test]
fn $a() {
let tokens = lex($b, &mut |_message, _span| {
unreachable!();
});
assert_eq!(tokens.len(), 2);
assert_eq!(
tokens[0].token_type,
TokenType::StringLiteral($c.to_string())
);
assert_eq!(tokens[0].span.start, 0);
assert_eq!(tokens[0].span.end, $b.chars().count());
assert_eq!(tokens[1].token_type, TokenType::EndOfFile);
}
};
}
lex_token_test!(lex_space, " ", TokenType::WhiteSpace);
lex_token_test!(lex_tab, "\t", TokenType::WhiteSpace);
lex_token_test!(lex_return_line, "\r", TokenType::WhiteSpace);
lex_token_test!(lex_newline, "\n", TokenType::WhiteSpace);
lex_token_test!(lex_equals, "=", TokenType::Equals);
lex_token_test!(lex_equals_equals, "==", TokenType::EqualsEquals);
lex_token_test!(lex_plus, "+", TokenType::Plus);
lex_token_test!(lex_plus_plus, "++", TokenType::PlusPlus);
lex_token_test!(lex_plus_equals, "+=", TokenType::PlusEquals);
lex_token_test!(lex_minus, "-", TokenType::Minus);
lex_token_test!(lex_minus_minus, "--", TokenType::MinusMinus);
lex_token_test!(lex_minus_equals, "-=", TokenType::MinusEquals);
lex_token_test!(lex_star, "*", TokenType::Star);
lex_token_test!(lex_star_equals, "*=", TokenType::StarEquals);
lex_token_test!(lex_star_star, "**", TokenType::StarStar);
lex_token_test!(lex_star_star_equals, "**=", TokenType::StarStarEquals);
lex_token_test!(lex_slash, "/", TokenType::Slash);
lex_token_test!(lex_slash_equals, "/=", TokenType::SlashEquals);
lex_token_test!(lex_percent, "%", TokenType::Percent);
lex_token_test!(lex_percent_equals, "%=", TokenType::PercentEquals);
lex_token_test!(lex_exclamation_mark, "!", TokenType::ExclamationMark);
lex_token_test!(lex_not_equals, "!=", TokenType::NotEquals);
lex_token_test!(lex_not_is_keyword, "!is", TokenType::NotIsKeyword);
lex_token_test!(lex_vert_line, "|", TokenType::VerticalLine);
lex_token_test!(lex_vert_line_equals, "|=", TokenType::LineEquals);
lex_token_test!(lex_line_line, "||", TokenType::LineLine);
lex_token_test!(lex_ampersand, "&", TokenType::Ampersand);
lex_token_test!(lex_ampersand_equals, "&=", TokenType::AmpersandEquals);
lex_token_test!(lex_ampersand_ampersand, "&&", TokenType::AmpersandAmpersand);
lex_token_test!(lex_less_than, "<", TokenType::LessThan);
lex_token_test!(lex_less_than_equals, "<=", TokenType::LessThanEquals);
lex_token_test!(lex_left_left, "<<", TokenType::LeftLeft);
lex_token_test!(lex_left_left_equals, "<<=", TokenType::LeftLeftEquals);
lex_token_test!(lex_greater_than, ">", TokenType::GreaterThan);
lex_token_test!(lex_greater_than_equals, ">=", TokenType::GreaterThanEquals);
lex_token_test!(lex_right_right, ">>", TokenType::RightRight);
lex_token_test!(lex_right_right_equals, ">>=", TokenType::RightRightEquals);
lex_token_test!(lex_right_right_right, ">>>", TokenType::RightRightRight);
lex_token_test!(
lex_right_right_right_equals,
">>>=",
TokenType::RightRightRightEquals
);
lex_token_test!(lex_tilde, "~", TokenType::Tilde);
lex_token_test!(lex_at_symbol, "@", TokenType::AtSymbol);
lex_token_test!(lex_and_keyword, "and", TokenType::AndKeyword);
lex_token_test!(lex_abstract_keyword, "abstract", TokenType::AbstractKeyword);
lex_token_test!(lex_auto_keyword, "auto", TokenType::AutoKeyword);
lex_token_test!(lex_bool_keyword, "bool", TokenType::BoolKeyword);
lex_token_test!(lex_break_keyword, "break", TokenType::BreakKeyword);
lex_token_test!(lex_case_keyword, "case", TokenType::CaseKeyword);
lex_token_test!(lex_cast_keyword, "cast", TokenType::CastKeyword);
lex_token_test!(lex_catch_keyword, "catch", TokenType::CatchKeyword);
lex_token_test!(lex_class_keyword, "class", TokenType::ClassKeyword);
lex_token_test!(lex_const_keyword, "const", TokenType::ConstKeyword);
lex_token_test!(lex_continue_keyword, "continue", TokenType::ContinueKeyword);
lex_token_test!(lex_default_keyword, "default", TokenType::DefaultKeyword);
lex_token_test!(lex_do_keyword, "do", TokenType::DoKeyword);
lex_token_test!(lex_double_keyword, "double", TokenType::DoubleKeyword);
lex_token_test!(lex_else_keyword, "else", TokenType::ElseKeyword);
lex_token_test!(lex_enum_keyword, "enum", TokenType::EnumKeyword);
lex_token_test!(lex_explicit_keyword, "explicit", TokenType::ExplicitKeyword);
lex_token_test!(lex_external_keyword, "external", TokenType::ExternalKeyword);
lex_token_test!(lex_false_keyword, "false", TokenType::FalseKeyword);
lex_token_test!(lex_final_keyword, "final", TokenType::FinalKeyword);
lex_token_test!(lex_float_keyword, "float", TokenType::FloatKeyword);
lex_token_test!(lex_for_keyword, "for", TokenType::ForKeyword);
lex_token_test!(lex_from_keyword, "from", TokenType::FromKeyword);
lex_token_test!(lex_funcdef_keyword, "funcdef", TokenType::FuncDefKeyword);
lex_token_test!(lex_function_keyword, "function", TokenType::FunctionKeyword);
lex_token_test!(lex_get_keyword, "get", TokenType::GetKeyword);
lex_token_test!(lex_if_keyword, "if", TokenType::IfKeyword);
lex_token_test!(lex_import_keyword, "import", TokenType::ImportKeyword);
lex_token_test!(lex_in_keyword, "in", TokenType::InKeyword);
lex_token_test!(lex_inout_keyword, "inout", TokenType::InOutKeyword);
lex_token_test!(lex_int_keyword, "int", TokenType::IntKeyword);
lex_token_test!(
lex_interface_keyword,
"interface",
TokenType::InterfaceKeyword
);
lex_token_test!(lex_int8_keyword, "int8", TokenType::Int8Keyword);
lex_token_test!(lex_int16_keyword, "int16", TokenType::Int16Keyword);
lex_token_test!(lex_int32_keyword, "int32", TokenType::Int32Keyword);
lex_token_test!(lex_int64_keyword, "int64", TokenType::Int64Keyword);
lex_token_test!(lex_is_keyword, "is", TokenType::IsKeyword);
lex_token_test!(lex_mixin_keyword, "mixin", TokenType::MixinKeyword);
lex_token_test!(
lex_namespace_keyword,
"namespace",
TokenType::NamespaceKeyword
);
lex_token_test!(lex_not_keyword, "not", TokenType::NotKeyword);
lex_token_test!(lex_null_keyword, "null", TokenType::NullKeyword);
lex_token_test!(lex_or_keyword, "or", TokenType::OrKeyword);
lex_token_test!(lex_out_keyword, "out", TokenType::OutKeyword);
lex_token_test!(lex_override_keyword, "override", TokenType::OverrideKeyword);
lex_token_test!(lex_private_keyword, "private", TokenType::PrivateKeyword);
lex_token_test!(lex_property_keyword, "property", TokenType::PropertyKeyword);
lex_token_test!(
lex_protected_keyword,
"protected",
TokenType::ProtectedKeyword
);
lex_token_test!(lex_return_keyword, "return", TokenType::ReturnKeyword);
lex_token_test!(lex_set_keyword, "set", TokenType::SetKeyword);
lex_token_test!(lex_shared_keyword, "shared", TokenType::SharedKeyword);
lex_token_test!(lex_super_keyword, "super", TokenType::SuperKeyword);
lex_token_test!(lex_switch_keyword, "switch", TokenType::SwitchKeyword);
lex_token_test!(lex_this_keyword, "this", TokenType::ThisKeyword);
lex_token_test!(lex_true_keyword, "true", TokenType::TrueKeyword);
lex_token_test!(lex_try_keyword, "try", TokenType::TryKeyword);
lex_token_test!(lex_typedef_keyword, "typedef", TokenType::TypeDefKeyword);
lex_token_test!(lex_uint_keyword, "uint", TokenType::UintKeyword);
lex_token_test!(lex_uint8_keyword, "uint8", TokenType::Uint8Keyword);
lex_token_test!(lex_uint16_keyword, "uint16", TokenType::Uint16Keyword);
lex_token_test!(lex_uint32_keyword, "uint32", TokenType::Uint32Keyword);
lex_token_test!(lex_void_keyword, "void", TokenType::VoidKeyword);
lex_token_test!(lex_while_keyword, "while", TokenType::WhileKeyword);
lex_token_test!(lex_xor_keyword, "xor", TokenType::XorKeyword);
lex_identifier_test!(lex_basic_identifier_foo, "foo");
lex_identifier_test!(lex_basic_identifier_foobar, "foobar");
lex_integer_test!(lex_zero, "0", 0);
lex_integer_test!(lex_one_two_three_four, "1234", 1234);
lex_integer_test!(lex_specific_one_two_three_four, "0d1234", 1234);
lex_integer_test!(lex_decimal_with_underline, "123_456", 123456);
lex_integer_test!(lex_specific_decimal_with_underline, "0D123_456", 123456);
lex_integer_test!(lex_hexadecimal_0f, "0X0F", 15);
lex_integer_test!(lex_hexadecimal_ff, "0xff", 255);
lex_integer_test!(lex_hexadecimal_ff_ff, "0xff_ff", 65535);
lex_integer_test!(lex_octal_112, "0o112", 74);
lex_integer_test!(lex_binary_1110, "0b1110", 14);
lex_integer_test!(lex_binary_01110, "0b01110", 14);
lex_float_test!(lex_zero_float, "0.0", 0.0);
lex_float_test!(lex_half, "0.5", 0.5);
lex_float_test!(lex_point_0_5, "0.05", 0.05);
lex_float_test!(lex_half_with_exponent, "0.5e10", 0.5e10);
lex_string_test!(lex_simple_string, "\"foo\"", "foo");
lex_string_test!(lex_simple_string_single_quote, "\'foo\'", "foo");
lex_string_test!(lex_string_with_escape, "\"fo\\\"o\"", "fo\"o");
lex_string_test!(lex_string_with_new_line, "\"fo\\no\"", "fo\no");
lex_string_test!(lex_heredoc_string, "\"\"\"foo\"\"\"", "foo");
lex_string_test!(lex_heredoc_string_with_quote, "\"\"\"fo\"o\"\"\"", "fo\"o");
#[test]
fn lex_two_identifier() {
let tokens = lex("foo bar", &mut |_message, _span| {});
assert_eq!(tokens.len(), 4);
assert_eq!(
tokens[0].token_type,
TokenType::Identifier("foo".to_string())
);
assert_eq!(tokens[0].span.start, 0);
assert_eq!(tokens[0].span.end, 3);
assert_eq!(tokens[1].token_type, TokenType::WhiteSpace);
assert_eq!(
tokens[2].token_type,
TokenType::Identifier("bar".to_string())
);
assert_eq!(tokens[2].span.start, 4);
assert_eq!(tokens[2].span.end, 7);
assert_eq!(tokens[3].token_type, TokenType::EndOfFile);
}
#[test]
fn lex_multiple_tokens_with_not_is() {
let tokens = lex("a !is b", &mut |_message, _span| {});
assert_eq!(tokens.len(), 6);
assert_eq!(tokens[0].token_type, TokenType::Identifier("a".to_string()));
assert_eq!(tokens[0].span.start, 0);
assert_eq!(tokens[0].span.end, 1);
assert_eq!(tokens[1].token_type, TokenType::WhiteSpace);
assert_eq!(tokens[1].span.start, 1);
assert_eq!(tokens[1].span.end, 2);
assert_eq!(tokens[2].token_type, TokenType::NotIsKeyword);
assert_eq!(tokens[2].span.start, 2);
assert_eq!(tokens[2].span.end, 5);
assert_eq!(tokens[3].token_type, TokenType::WhiteSpace);
assert_eq!(tokens[3].span.start, 5);
assert_eq!(tokens[3].span.end, 6);
assert_eq!(tokens[4].token_type, TokenType::Identifier("b".to_string()));
assert_eq!(tokens[4].span.start, 6);
assert_eq!(tokens[4].span.end, 7);
assert_eq!(tokens[5].token_type, TokenType::EndOfFile);
assert_eq!(tokens[5].span.start, 7);
assert_eq!(tokens[5].span.end, 7);
}
#[test]
fn lex_invalid_character_at_first_position() {
let mut reached = false;
lex("\x08", &mut |message, span| {
reached = true;
assert_matches!(message, Message::UnexpectedCharacter('\x08'));
assert_eq!(span.start, 0);
assert_eq!(span.end, 1);
});
assert!(reached);
}
#[test]
fn lex_invalid_character_at_other_position() {
let mut reached = false;
lex(" \x08", &mut |message, span| {
reached = true;
assert_matches!(message, Message::UnexpectedCharacter('\x08'));
assert_eq!(span.start, 2);
assert_eq!(span.end, 3);
});
assert!(reached);
}
#[test]
fn lex_unclosed_string_literal() {
let mut reached = false;
lex("\" ", &mut |message, span| {
reached = true;
assert_matches!(message, Message::UnclosedStringLiteral);
assert_eq!(span.start, 5);
assert_eq!(span.end, 6);
});
assert!(reached);
}

462
src/parsing/lexer/mod.rs Normal file
View File

@ -0,0 +1,462 @@
use crate::logger::messages::Message;
use crate::parsing::lexer::lex_tokens::LexToken;
use crate::span::Span;
use lex_numerical::lex_numeric;
use lex_tokens::TokenType;
use string_walker::StringWalker;
mod lex_numerical;
pub mod lex_tokens;
#[cfg(test)]
mod lexer_tests;
mod string_walker;
#[inline(always)]
fn lex_and_consume(
chars: &mut StringWalker,
eq: TokenType,
f: &mut dyn FnMut(TokenType, usize, usize),
) {
chars.next();
f(eq, chars.real_position - 1, chars.real_position)
}
#[inline(always)]
fn lex_eq_or(
chars: &mut StringWalker,
eq: TokenType,
or: TokenType,
start_pos: usize,
f: &mut dyn FnMut(TokenType, usize, usize),
) {
chars.next();
if let Some('=') = chars.peek() {
chars.next();
f(eq, start_pos, chars.real_position);
} else {
f(or, start_pos, chars.real_position);
}
}
#[inline(always)]
fn lex_eq_rep_or(
chars: &mut StringWalker,
v: char,
eq: TokenType,
rep: TokenType,
or: TokenType,
f: &mut dyn FnMut(TokenType, usize, usize),
) {
let start_pos = chars.real_position;
chars.next();
match chars.peek() {
Some(c) => {
if *c == v {
chars.next();
f(rep, start_pos, chars.real_position);
} else if *c == '=' {
chars.next();
f(eq, start_pos, chars.real_position);
} else {
f(or, start_pos, chars.real_position);
}
}
None => f(or, start_pos, chars.real_position),
};
}
type TT = TokenType;
fn lex_keyword_or_identifier(chars: &mut StringWalker, f: &mut dyn FnMut(TokenType, usize, usize)) {
let mut reading = true;
let mut length = 1;
let start_pos = chars.real_position;
while reading {
match chars.peek() {
Some(c) => match c {
'a'..'z' | 'A'..'Z' | '_' | '0'..'9' => {
length += 1;
}
_ => {
reading = false;
}
},
None => {
reading = false;
}
};
}
chars.reset_peek();
let c: String = chars.take(length);
let token_type = match c.as_str() {
"and" => TT::AndKeyword,
"abstract" => TT::AbstractKeyword,
"auto" => TT::AutoKeyword,
"bool" => TT::BoolKeyword,
"break" => TT::BreakKeyword,
"case" => TT::CaseKeyword,
"cast" => TT::CastKeyword,
"catch" => TT::CatchKeyword,
"class" => TT::ClassKeyword,
"const" => TT::ConstKeyword,
"continue" => TT::ContinueKeyword,
"default" => TT::DefaultKeyword,
"do" => TT::DoKeyword,
"double" => TT::DoubleKeyword,
"else" => TT::ElseKeyword,
"enum" => TT::EnumKeyword,
"explicit" => TT::ExplicitKeyword,
"external" => TT::ExternalKeyword,
"false" => TT::FalseKeyword,
"final" => TT::FinalKeyword,
"float" => TT::FloatKeyword,
"for" => TT::ForKeyword,
"from" => TT::FromKeyword,
"funcdef" => TT::FuncDefKeyword,
"function" => TT::FunctionKeyword,
"get" => TT::GetKeyword,
"if" => TT::IfKeyword,
"import" => TT::ImportKeyword,
"in" => TT::InKeyword,
"inout" => TT::InOutKeyword,
"int" => TT::IntKeyword,
"interface" => TT::InterfaceKeyword,
"int8" => TT::Int8Keyword,
"int16" => TT::Int16Keyword,
"int32" => TT::Int32Keyword,
"int64" => TT::Int64Keyword,
"is" => TT::IsKeyword,
"mixin" => TT::MixinKeyword,
"namespace" => TT::NamespaceKeyword,
"not" => TT::NotKeyword,
"null" => TT::NullKeyword,
"or" => TT::OrKeyword,
"out" => TT::OutKeyword,
"override" => TT::OverrideKeyword,
"private" => TT::PrivateKeyword,
"property" => TT::PropertyKeyword,
"protected" => TT::ProtectedKeyword,
"return" => TT::ReturnKeyword,
"set" => TT::SetKeyword,
"shared" => TT::SharedKeyword,
"super" => TT::SuperKeyword,
"switch" => TT::SwitchKeyword,
"this" => TT::ThisKeyword,
"true" => TT::TrueKeyword,
"try" => TT::TryKeyword,
"typedef" => TT::TypeDefKeyword,
"uint" => TT::UintKeyword,
"uint8" => TT::Uint8Keyword,
"uint16" => TT::Uint16Keyword,
"uint32" => TT::Uint32Keyword,
"uint64" => TT::Uint64Keyword,
"void" => TT::VoidKeyword,
"while" => TT::WhileKeyword,
"xor" => TT::XorKeyword,
_ => TT::Identifier(c),
};
f(token_type, start_pos, chars.real_position);
}
fn lex_string(
chars: &mut StringWalker,
opening_char: &char,
heredoc: bool,
log: &mut dyn FnMut(Message, Span),
f: &mut dyn FnMut(TokenType, usize, usize),
) {
let start_pos = chars.real_position;
chars.next();
if heredoc {
chars.next();
chars.next();
}
let mut length: i32 = 0;
let mut string_length = 0;
let mut last_was_control = false;
// We loop twice here. In the first loop we get the number of characters to read, the number of
// characters the string should be, and whether it's valid. This reduces the amount of allocations
// we need to do to read a string.
loop {
let p = chars.peek();
match p {
None => {
log(
Message::UnclosedStringLiteral,
Span::new(chars.peek_position - 1, chars.peek_position),
);
break;
}
Some(&'\\') if !last_was_control => {
last_was_control = true;
length += 1;
}
Some(c) => {
if c == opening_char && !last_was_control {
if heredoc {
if chars.peek() == Some(&'\"') && chars.peek() == Some(&'\"') {
break;
} else {
length += 1;
string_length += 1;
}
} else {
break;
}
}
length += 1;
string_length += 1;
last_was_control = false;
}
}
}
chars.reset_peek();
let mut s: String = String::with_capacity(string_length);
for _ in 0..length {
let p = chars.next().unwrap();
match p {
'\\' => {
if last_was_control {
s.push('\\');
} else {
last_was_control = true;
continue;
}
}
'0' if last_was_control => s.push('\0'),
'n' if last_was_control => s.push('\n'),
'r' if last_was_control => s.push('\r'),
't' if last_was_control => s.push('\t'),
_ => s.push(p),
};
last_was_control = false;
}
assert_eq!(s.len(), string_length);
chars.reset_peek();
chars.next();
if heredoc {
chars.next();
chars.next();
}
f(TT::StringLiteral(s), start_pos, chars.real_position);
}
pub fn lex(s: &str, log: &mut dyn FnMut(Message, Span)) -> Vec<LexToken> {
let mut tokens: Vec<LexToken> = Vec::new();
let mut chars = StringWalker::create(s);
let mut reading = true;
let mut add_token = |token_type: TokenType, start: usize, end: usize| {
tokens.push(LexToken {
token_type,
span: Span::new(start, end),
})
};
while reading {
let p = chars.peek().cloned();
match p {
Some(c) => match c {
' ' | '\t' | '\r' | '\n' => {
chars.next();
add_token(TT::WhiteSpace, chars.real_position - 1, chars.real_position);
}
'=' => {
let start_pos = chars.real_position;
lex_eq_or(
&mut chars,
TT::EqualsEquals,
TT::Equals,
start_pos,
&mut add_token,
)
}
'+' => lex_eq_rep_or(
&mut chars,
'+',
TT::PlusEquals,
TT::PlusPlus,
TT::Plus,
&mut add_token,
),
'-' => lex_eq_rep_or(
&mut chars,
'-',
TT::MinusEquals,
TT::MinusMinus,
TT::Minus,
&mut add_token,
),
'*' => {
let start_pos = chars.real_position;
if chars.peek() == Some(&'*') {
chars.next();
lex_eq_or(
&mut chars,
TT::StarStarEquals,
TT::StarStar,
start_pos,
&mut add_token,
)
} else {
lex_eq_or(
&mut chars,
TT::StarEquals,
TT::Star,
start_pos,
&mut add_token,
)
}
}
'/' => {
let start_pos = chars.real_position;
lex_eq_or(
&mut chars,
TT::SlashEquals,
TT::Slash,
start_pos,
&mut add_token,
);
}
'%' => {
let start_pos = chars.real_position;
lex_eq_or(
&mut chars,
TT::PercentEquals,
TT::Percent,
start_pos,
&mut add_token,
);
}
'|' => lex_eq_rep_or(
&mut chars,
'|',
TT::LineEquals,
TT::LineLine,
TT::VerticalLine,
&mut add_token,
),
'&' => lex_eq_rep_or(
&mut chars,
'&',
TT::AmpersandEquals,
TT::AmpersandAmpersand,
TT::Ampersand,
&mut add_token,
),
'^' => lex_eq_rep_or(
&mut chars,
'^',
TT::RoofEquals,
TT::RoofRoof,
TT::Roof,
&mut add_token,
),
'<' => {
let start_pos = chars.real_position;
if chars.peek() == Some(&'<') {
chars.next();
lex_eq_or(
&mut chars,
TT::LeftLeftEquals,
TT::LeftLeft,
start_pos,
&mut add_token,
)
} else {
lex_eq_or(
&mut chars,
TT::LessThanEquals,
TT::LessThan,
start_pos,
&mut add_token,
)
}
}
'>' => {
let start_pos = chars.real_position;
if chars.peek() == Some(&'>') {
if chars.peek() == Some(&'>') {
chars.next();
chars.next();
lex_eq_or(
&mut chars,
TT::RightRightRightEquals,
TT::RightRightRight,
start_pos,
&mut add_token,
)
} else {
chars.next();
lex_eq_or(
&mut chars,
TT::RightRightEquals,
TT::RightRight,
start_pos,
&mut add_token,
)
}
} else {
lex_eq_or(
&mut chars,
TT::GreaterThanEquals,
TT::GreaterThan,
start_pos,
&mut add_token,
)
}
}
'!' => {
let start_pos = chars.real_position;
let next = chars.peek();
if next == Some(&'=') {
chars.next();
chars.next();
add_token(TT::NotEquals, start_pos, chars.real_position);
} else if next == Some(&'i') && chars.peek() == Some(&'s') {
chars.next();
chars.next();
chars.next();
add_token(TT::NotIsKeyword, start_pos, chars.real_position);
} else {
chars.next();
add_token(TT::ExclamationMark, start_pos, chars.real_position);
}
}
'~' => lex_and_consume(&mut chars, TT::Tilde, &mut add_token),
'@' => lex_and_consume(&mut chars, TT::AtSymbol, &mut add_token),
';' => lex_and_consume(&mut chars, TT::Semicolon, &mut add_token),
':' => lex_and_consume(&mut chars, TT::Colon, &mut add_token),
'(' => lex_and_consume(&mut chars, TT::OpenBracket, &mut add_token),
')' => lex_and_consume(&mut chars, TT::CloseBracket, &mut add_token),
'{' => lex_and_consume(&mut chars, TT::OpenCurlyBracket, &mut add_token),
'}' => lex_and_consume(&mut chars, TT::CloseCurlyBracket, &mut add_token),
'[' => lex_and_consume(&mut chars, TT::OpenBlockBracket, &mut add_token),
']' => lex_and_consume(&mut chars, TT::CloseBlockBracket, &mut add_token),
'0'..'9' => lex_numeric(&mut chars, &mut add_token),
'a'..'z' | 'A'..'Z' | '_' => lex_keyword_or_identifier(&mut chars, &mut add_token),
'\'' => lex_string(&mut chars, &'\'', false, log, &mut add_token),
'"' if chars.peek() == Some(&'\"') && chars.peek() == Some(&'\"') => {
lex_string(&mut chars, &'"', true, log, &mut add_token)
}
'"' => lex_string(&mut chars, &'"', false, log, &mut add_token),
_ => log(
Message::UnexpectedCharacter(c),
Span::new(chars.real_position, chars.real_position + 1),
),
},
None => {
add_token(TT::EndOfFile, chars.real_position, chars.real_position);
reading = false;
}
}
}
tokens
}

View File

@ -0,0 +1,40 @@
use itertools::{Itertools, MultiPeek};
use std::str::Chars;
pub(super) struct StringWalker<'a> {
chars: MultiPeek<Chars<'a>>,
pub(crate) real_position: usize,
pub(crate) peek_position: usize,
}
impl<'a> StringWalker<'a> {
pub fn create(s: &str) -> StringWalker {
StringWalker {
chars: s.chars().multipeek(),
real_position: 0,
peek_position: 0,
}
}
pub fn peek(&mut self) -> Option<&char> {
self.peek_position += 1;
self.chars.peek()
}
pub fn next(&mut self) -> Option<char> {
self.real_position += 1;
self.peek_position = self.real_position;
self.chars.next()
}
pub fn reset_peek(&mut self) {
self.peek_position = self.real_position;
self.chars.reset_peek()
}
pub fn take(&mut self, length: usize) -> String {
self.real_position += length;
self.peek_position = self.real_position;
self.chars.by_ref().take(length).collect()
}
}

View File

@ -1,241 +0,0 @@
use super::lex_tokens::LexToken;
use super::lexer::lex;
macro_rules! lex_token_test {
( $a: ident, $b: expr, $c: expr) => {
#[test]
fn $a() {
let tokens = lex($b);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], $c);
assert_eq!(tokens[1], LexToken::EndOfFile);
}
};
}
macro_rules! lex_identifier_test {
( $a: ident, $b: expr) => {
#[test]
fn $a() {
let tokens = lex($b);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], LexToken::Identifier($b.to_string()));
assert_eq!(tokens[1], LexToken::EndOfFile);
}
};
}
macro_rules! lex_integer_test {
( $a: ident, $b: expr, $c: expr) => {
#[test]
fn $a() {
let tokens = lex($b);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], LexToken::IntegerLiteral($c));
assert_eq!(tokens[1], LexToken::EndOfFile);
}
};
}
macro_rules! lex_float_test {
( $a: ident, $b: expr, $c: expr) => {
#[test]
fn $a() {
let tokens = lex($b);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], LexToken::FloatLiteral($c));
assert_eq!(tokens[1], LexToken::EndOfFile);
}
};
}
macro_rules! lex_string_test {
( $a: ident, $b: expr, $c: expr) => {
#[test]
fn $a() {
let tokens = lex($b);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], LexToken::StringLiteral($c.to_string()));
assert_eq!(tokens[1], LexToken::EndOfFile);
}
};
}
lex_token_test!(lex_space, " ", LexToken::WhiteSpace);
lex_token_test!(lex_tab, "\t", LexToken::WhiteSpace);
lex_token_test!(lex_return_line, "\r", LexToken::WhiteSpace);
lex_token_test!(lex_newline, "\n", LexToken::WhiteSpace);
lex_token_test!(lex_equals, "=", LexToken::Equals);
lex_token_test!(lex_equals_equals, "==", LexToken::EqualsEquals);
lex_token_test!(lex_plus, "+", LexToken::Plus);
lex_token_test!(lex_plus_plus, "++", LexToken::PlusPlus);
lex_token_test!(lex_plus_equals, "+=", LexToken::PlusEquals);
lex_token_test!(lex_minus, "-", LexToken::Minus);
lex_token_test!(lex_minus_minus, "--", LexToken::MinusMinus);
lex_token_test!(lex_minus_equals, "-=", LexToken::MinusEquals);
lex_token_test!(lex_star, "*", LexToken::Star);
lex_token_test!(lex_star_equals, "*=", LexToken::StarEquals);
lex_token_test!(lex_star_star, "**", LexToken::StarStar);
lex_token_test!(lex_star_star_equals, "**=", LexToken::StarStarEquals);
lex_token_test!(lex_slash, "/", LexToken::Slash);
lex_token_test!(lex_slash_equals, "/=", LexToken::SlashEquals);
lex_token_test!(lex_percent, "%", LexToken::Percent);
lex_token_test!(lex_percent_equals, "%=", LexToken::PercentEquals);
lex_token_test!(lex_exclamation_mark, "!", LexToken::ExclamationMark);
lex_token_test!(lex_not_equals, "!=", LexToken::NotEquals);
lex_token_test!(lex_not_is_keyword, "!is", LexToken::NotIsKeyword);
lex_token_test!(lex_vert_line, "|", LexToken::VerticalLine);
lex_token_test!(lex_vert_line_equals, "|=", LexToken::LineEquals);
lex_token_test!(lex_line_line, "||", LexToken::LineLine);
lex_token_test!(lex_ampersand, "&", LexToken::Ampersand);
lex_token_test!(lex_ampersand_equals, "&=", LexToken::AmpersandEquals);
lex_token_test!(lex_ampersand_ampersand, "&&", LexToken::AmpersandAmpersand);
lex_token_test!(lex_less_than, "<", LexToken::LessThan);
lex_token_test!(lex_less_than_equals, "<=", LexToken::LessThanEquals);
lex_token_test!(lex_left_left, "<<", LexToken::LeftLeft);
lex_token_test!(lex_left_left_equals, "<<=", LexToken::LeftLeftEquals);
lex_token_test!(lex_greater_than, ">", LexToken::GreaterThan);
lex_token_test!(lex_greater_than_equals, ">=", LexToken::GreaterThanEquals);
lex_token_test!(lex_right_right, ">>", LexToken::RightRight);
lex_token_test!(lex_right_right_equals, ">>=", LexToken::RightRightEquals);
lex_token_test!(lex_right_right_right, ">>>", LexToken::RightRightRight);
lex_token_test!(
lex_right_right_right_equals,
">>>=",
LexToken::RightRightRightEquals
);
lex_token_test!(lex_tilde, "~", LexToken::Tilde);
lex_token_test!(lex_at_symbol, "@", LexToken::AtSymbol);
lex_token_test!(lex_and_keyword, "and", LexToken::AndKeyword);
lex_token_test!(lex_abstract_keyword, "abstract", LexToken::AbstractKeyword);
lex_token_test!(lex_auto_keyword, "auto", LexToken::AutoKeyword);
lex_token_test!(lex_bool_keyword, "bool", LexToken::BoolKeyword);
lex_token_test!(lex_break_keyword, "break", LexToken::BreakKeyword);
lex_token_test!(lex_case_keyword, "case", LexToken::CaseKeyword);
lex_token_test!(lex_cast_keyword, "cast", LexToken::CastKeyword);
lex_token_test!(lex_catch_keyword, "catch", LexToken::CatchKeyword);
lex_token_test!(lex_class_keyword, "class", LexToken::ClassKeyword);
lex_token_test!(lex_const_keyword, "const", LexToken::ConstKeyword);
lex_token_test!(lex_continue_keyword, "continue", LexToken::ContinueKeyword);
lex_token_test!(lex_default_keyword, "default", LexToken::DefaultKeyword);
lex_token_test!(lex_do_keyword, "do", LexToken::DoKeyword);
lex_token_test!(lex_double_keyword, "double", LexToken::DoubleKeyword);
lex_token_test!(lex_else_keyword, "else", LexToken::ElseKeyword);
lex_token_test!(lex_enum_keyword, "enum", LexToken::EnumKeyword);
lex_token_test!(lex_explicit_keyword, "explicit", LexToken::ExplicitKeyword);
lex_token_test!(lex_external_keyword, "external", LexToken::ExternalKeyword);
lex_token_test!(lex_false_keyword, "false", LexToken::FalseKeyword);
lex_token_test!(lex_final_keyword, "final", LexToken::FinalKeyword);
lex_token_test!(lex_float_keyword, "float", LexToken::FloatKeyword);
lex_token_test!(lex_for_keyword, "for", LexToken::ForKeyword);
lex_token_test!(lex_from_keyword, "from", LexToken::FromKeyword);
lex_token_test!(lex_funcdef_keyword, "funcdef", LexToken::FuncDefKeyword);
lex_token_test!(lex_function_keyword, "function", LexToken::FunctionKeyword);
lex_token_test!(lex_get_keyword, "get", LexToken::GetKeyword);
lex_token_test!(lex_if_keyword, "if", LexToken::IfKeyword);
lex_token_test!(lex_import_keyword, "import", LexToken::ImportKeyword);
lex_token_test!(lex_in_keyword, "in", LexToken::InKeyword);
lex_token_test!(lex_inout_keyword, "inout", LexToken::InOutKeyword);
lex_token_test!(lex_int_keyword, "int", LexToken::IntKeyword);
lex_token_test!(
lex_interface_keyword,
"interface",
LexToken::InterfaceKeyword
);
lex_token_test!(lex_int8_keyword, "int8", LexToken::Int8Keyword);
lex_token_test!(lex_int16_keyword, "int16", LexToken::Int16Keyword);
lex_token_test!(lex_int32_keyword, "int32", LexToken::Int32Keyword);
lex_token_test!(lex_int64_keyword, "int64", LexToken::Int64Keyword);
lex_token_test!(lex_is_keyword, "is", LexToken::IsKeyword);
lex_token_test!(lex_mixin_keyword, "mixin", LexToken::MixinKeyword);
lex_token_test!(
lex_namespace_keyword,
"namespace",
LexToken::NamespaceKeyword
);
lex_token_test!(lex_not_keyword, "not", LexToken::NotKeyword);
lex_token_test!(lex_null_keyword, "null", LexToken::NullKeyword);
lex_token_test!(lex_or_keyword, "or", LexToken::OrKeyword);
lex_token_test!(lex_out_keyword, "out", LexToken::OutKeyword);
lex_token_test!(lex_override_keyword, "override", LexToken::OverrideKeyword);
lex_token_test!(lex_private_keyword, "private", LexToken::PrivateKeyword);
lex_token_test!(lex_property_keyword, "property", LexToken::PropertyKeyword);
lex_token_test!(
lex_protected_keyword,
"protected",
LexToken::ProtectedKeyword
);
lex_token_test!(lex_return_keyword, "return", LexToken::ReturnKeyword);
lex_token_test!(lex_set_keyword, "set", LexToken::SetKeyword);
lex_token_test!(lex_shared_keyword, "shared", LexToken::SharedKeyword);
lex_token_test!(lex_super_keyword, "super", LexToken::SuperKeyword);
lex_token_test!(lex_switch_keyword, "switch", LexToken::SwitchKeyword);
lex_token_test!(lex_this_keyword, "this", LexToken::ThisKeyword);
lex_token_test!(lex_true_keyword, "true", LexToken::TrueKeyword);
lex_token_test!(lex_try_keyword, "try", LexToken::TryKeyword);
lex_token_test!(lex_typedef_keyword, "typedef", LexToken::TypeDefKeyword);
lex_token_test!(lex_uint_keyword, "uint", LexToken::UintKeyword);
lex_token_test!(lex_uint8_keyword, "uint8", LexToken::Uint8Keyword);
lex_token_test!(lex_uint16_keyword, "uint16", LexToken::Uint16Keyword);
lex_token_test!(lex_uint32_keyword, "uint32", LexToken::Uint32Keyword);
lex_token_test!(lex_void_keyword, "void", LexToken::VoidKeyword);
lex_token_test!(lex_while_keyword, "while", LexToken::WhileKeyword);
lex_token_test!(lex_xor_keyword, "xor", LexToken::XorKeyword);
lex_identifier_test!(lex_basic_identifier_foo, "foo");
lex_identifier_test!(lex_basic_identifier_foobar, "foobar");
lex_integer_test!(lex_zero, "0", 0);
lex_integer_test!(lex_one_two_three_four, "1234", 1234);
lex_integer_test!(lex_specific_one_two_three_four, "0d1234", 1234);
lex_integer_test!(lex_decimal_with_underline, "123_456", 123456);
lex_integer_test!(lex_specific_decimal_with_underline, "0D123_456", 123456);
lex_integer_test!(lex_hexadecimal_0f, "0X0F", 15);
lex_integer_test!(lex_hexadecimal_ff, "0xff", 255);
lex_integer_test!(lex_hexadecimal_ff_ff, "0xff_ff", 65535);
lex_integer_test!(lex_octal_112, "0o112", 74);
lex_integer_test!(lex_binary_1110, "0b1110", 14);
lex_integer_test!(lex_binary_01110, "0b01110", 14);
lex_float_test!(lex_zero_float, "0.0", 0.0);
lex_float_test!(lex_half, "0.5", 0.5);
lex_float_test!(lex_point_0_5, "0.05", 0.05);
lex_float_test!(lex_half_with_exponent, "0.5e10", 0.5e10);
lex_string_test!(lex_simple_string, "\"foo\"", "foo");
lex_string_test!(lex_simple_string_single_quote, "\'foo\'", "foo");
lex_string_test!(lex_string_with_escape, "\"fo\\\"o\"", "fo\"o");
lex_string_test!(lex_string_with_new_line, "\"fo\\no\"", "fo\no");
lex_string_test!(lex_heredoc_string, "\"\"\"foo\"\"\"", "foo");
lex_string_test!(lex_heredoc_string_with_quote, "\"\"\"fo\"o\"\"\"", "fo\"o");
#[test]
fn lex_two_identifier() {
let tokens = lex("foo bar");
assert_eq!(tokens.len(), 4);
assert_eq!(tokens[0], LexToken::Identifier("foo".to_string()));
assert_eq!(tokens[1], LexToken::WhiteSpace);
assert_eq!(tokens[2], LexToken::Identifier("bar".to_string()));
assert_eq!(tokens[3], LexToken::EndOfFile);
}
#[test]
fn lex_multiple_tokens_with_not_is() {
let tokens = lex("a !is b");
assert_eq!(tokens.len(), 6);
assert_eq!(tokens[0], LexToken::Identifier("a".to_string()));
assert_eq!(tokens[1], LexToken::WhiteSpace);
assert_eq!(tokens[2], LexToken::NotIsKeyword);
assert_eq!(tokens[3], LexToken::WhiteSpace);
assert_eq!(tokens[4], LexToken::Identifier("b".to_string()));
assert_eq!(tokens[5], LexToken::EndOfFile);
}

View File

@ -1,6 +1,2 @@
mod lex_numerical;
pub mod lex_tokens;
pub mod lexer; pub mod lexer;
pub mod parser;
#[cfg(test)]
mod lexer_tests;

89
src/parsing/parser/mod.rs Normal file
View File

@ -0,0 +1,89 @@
pub mod parsed_statement;
#[cfg(test)]
mod parser_tests;
use super::lexer::lex_tokens::TokenType;
use crate::parsing::parser::parsed_statement::ParsedStatement;
use itertools::{Itertools, MultiPeek};
struct ParseReader<'a> {
tokens: MultiPeek<core::slice::Iter<'a, TokenType>>,
}
impl<'a> ParseReader<'a> {
pub fn peek(&mut self) -> &TokenType {
let t = self.tokens.peek();
match t {
None => &TokenType::EndOfFile,
Some(TokenType::WhiteSpace) => self.peek(),
Some(v) => v,
}
}
pub fn next(&mut self) -> &TokenType {
let t = self.tokens.next();
match t {
None => &TokenType::EndOfFile,
Some(TokenType::WhiteSpace) => self.next(),
Some(v) => v,
}
}
pub fn consume(&mut self, token: TokenType) -> &TokenType {
let n = self.next();
if n != &token {
// TODO: log error
unimplemented!()
}
n
}
#[inline(always)]
pub fn reset_peek(&mut self) {
self.tokens.reset_peek();
}
}
pub fn parse(tokens: Vec<TokenType>) -> Box<ParsedStatement> {
let mut reader = ParseReader {
tokens: tokens.iter().multipeek(),
};
parse_script(&mut reader)
}
fn parse_script(reader: &mut ParseReader) -> Box<ParsedStatement> {
let mut vec: Vec<Box<ParsedStatement>> = Vec::new();
loop {
let n = reader.peek();
match n {
TokenType::NamespaceKeyword => {
vec.push(parse_namespace(reader));
}
TokenType::EndOfFile => break,
_ => {
// Log error?
}
}
}
Box::new(ParsedStatement::Script(vec))
}
fn parse_namespace(reader: &mut ParseReader) -> Box<ParsedStatement> {
reader.next(); // Consume namespace
let identifier_token = reader.next();
let s: String;
match identifier_token {
TokenType::Identifier(i) => {
s = i.to_string();
}
_ => {
// Log error
unimplemented!();
}
}
reader.consume(TokenType::OpenCurlyBracket);
let script = parse_script(reader);
reader.consume(TokenType::CloseCurlyBracket);
Box::new(ParsedStatement::Namespace(s, script))
}

View File

@ -0,0 +1,5 @@
pub enum ParsedStatement {
Script(Vec<Box<ParsedStatement>>),
Namespace(String, Box<ParsedStatement>),
}

View File

@ -0,0 +1,31 @@
use super::parse;
use super::parsed_statement::ParsedStatement;
use crate::parsing::lexer::lex_tokens::TokenType;
#[test]
fn test_empty_namespace() {
let script = parse(vec![
TokenType::NamespaceKeyword,
TokenType::WhiteSpace,
TokenType::Identifier("foo".to_string()),
TokenType::WhiteSpace,
TokenType::OpenCurlyBracket,
TokenType::CloseCurlyBracket,
TokenType::EndOfFile,
]);
if let ParsedStatement::Script(inner) = script.as_ref() {
assert_eq!(1, inner.len());
if let ParsedStatement::Namespace(identifier, inner_script) = inner[0].as_ref() {
assert_eq!(identifier, "foo");
if let ParsedStatement::Script(inner) = inner_script.as_ref() {
assert_eq!(0, inner.len());
} else {
unreachable!();
}
} else {
unreachable!()
}
} else {
unreachable!();
}
}

10
src/span.rs Normal file
View File

@ -0,0 +1,10 @@
pub struct Span {
pub start: usize,
pub end: usize,
}
impl Span {
pub fn new(start: usize, end: usize) -> Span {
Span { start, end }
}
}