Split tokenizer into its own file
This commit is contained in:
@ -1,5 +1,9 @@
pub mod tokenizer;
use std::{env, fs};
use std::{env, fs};
use crate::tokenizer::{Token, tokenize};
fn main()
fn main()
let args: Vec<String> = env::args().collect();
let args: Vec<String> = env::args().collect();
@ -21,1189 +25,3 @@ fn compile(file_content: &String) -> Result<(), &'static str>
println!("{:?}", tokens);
println!("{:?}", tokens);
return Ok(());
return Ok(());
#[derive(Debug, Clone)]
enum Token
And, Break, Do, Else, Elseif, End,
False, For, Function, Goto, If, In,
Local, Nil, Not, Or, Repeat, Return,
Then, True, Until, While,
Plus, Minus, Star, Slash, Percent, Caret, Hash,
Ampersand, Tilde, Pipe, LtLt, GtGt, SlashSlash,
EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals,
RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon,
Semicolon, Colon, Comma, Dot, DotDot, DotDotDot,
#[derive(Debug, Clone, Copy, PartialEq)]
enum TokenizerState
Quote, SingleQuote, Name, Number, Zero,
A, B, D, E, F, G, I, L, N, O, R, T, U, W,
Plus, Minus, Star, Slash, Percent, Caret, Hash,
Ampersand, Tilde, Pipe, Lt, Gt, Equals, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed,
Colon, Semicolon, Comma, Dot,
An, Br, Do, El, En, Fa, Fo, Fu, Go, If, In, Lo, Ni, No, Or, Re, Th, Tr, Un, Wh,
LtLt, GtGt, SlashSlash, EqualsEquals, TildeEquals, LtEquals, GtEquals, ColonColon, DotDot,
SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ,
BigCommentLongBracketStart, SmallComment,
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,
BigComment, BigCommentLongBracketEnd,
Break, Elsei, False, Funct, Local, Repea, Retur, Until, While,
Elseif, Functi, Repeat, Return,
fn tokenize_update_index_and_state(last_index: &mut i32, index: usize, state: &mut TokenizerState, new_state: TokenizerState)
*last_index = index as i32;
*state = new_state;
fn tokenize_terminal_no_str(last_index: &mut i32, index: usize, token: &mut Option<Token>, state: &mut TokenizerState, new_token: Option<Token>, new_state: TokenizerState)
tokenize_update_index_and_state(last_index, index, state, new_state);
*token = new_token;
fn tokenize_terminal_no_token(last_index: &mut i32, index: usize, state: &mut TokenizerState, new_state: TokenizerState, token_str: &mut String, ch: char)
tokenize_update_index_and_state(last_index, index, state, new_state);
fn tokenize_terminal(last_index: &mut i32, index: usize, token: &mut Option<Token>, state: &mut TokenizerState, new_token: Option<Token>, new_state: TokenizerState, token_str: &mut String, ch: char)
tokenize_terminal_no_str(last_index, index, token, state, new_token, new_state);
fn tokenize_backtrack(last_index: &mut i32, index: &mut usize, tokens: &mut Vec<Token>, token: &mut Option<Token>, token_str: &mut String, state: &mut TokenizerState) -> Result<(), &'static str>
return tokenize_backtrack_custom_token(last_index, index, tokens, token, token_str, state, token.clone().unwrap());
fn tokenize_backtrack_name(last_index: &mut i32, index: &mut usize, tokens: &mut Vec<Token>, token: &mut Option<Token>, token_str: &mut String, state: &mut TokenizerState) -> Result<(), &'static str>
if *last_index == -1 || token.is_none()
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
return Err("Lexerr");
*index = *last_index as usize;
*last_index = -1;
*token = None;
*state = TokenizerState::Start;
return Ok(());
fn tokenize_backtrack_custom_token(last_index: &mut i32, index: &mut usize, tokens: &mut Vec<Token>, token: &mut Option<Token>, token_str: &mut String, state: &mut TokenizerState, new_token: Token) -> Result<(), &'static str>
if *last_index == -1 || token.is_none()
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
return Err("Lexerr");
*index = *last_index as usize;
*last_index = -1;
*token = None;
*state = TokenizerState::Start;
return Ok(());
fn tokenize_alphanumeric_nonstart(last_index: &mut i32, index: &mut usize, tokens: &mut Vec<Token>, token: &mut Option<Token>, token_str: &mut String, state: &mut TokenizerState, ch: char) -> Result<(), &'static str>
if ch.is_ascii_alphanumeric() || ch == '_'
tokenize_update_index_and_state(last_index, *index, state, TokenizerState::Name);
tokenize_backtrack_name(last_index, index, tokens, token, token_str, state)?;
return Ok(());
fn tokenize_alphanumeric_nonstart_custom(last_index: &mut i32, index: &mut usize, tokens: &mut Vec<Token>, token: &mut Option<Token>, token_str: &mut String, state: &mut TokenizerState, ch: char, new_token: Token) -> Result<(), &'static str>
if ch.is_ascii_alphanumeric() || ch == '_'
tokenize_update_index_and_state(last_index, *index, state, TokenizerState::Name);
tokenize_backtrack_custom_token(last_index, index, tokens, token, token_str, state, new_token)?;
return Ok(());
fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, index: &mut usize, token: &mut Option<Token>, token_str: &mut String, tokens: &mut Vec<Token>, long_bracket_level: &mut u32) -> Result<(), &'static str>
match state
TokenizerState::Start =>
match ch
'-' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Minus), TokenizerState::Minus),
'a' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("a".to_string())), TokenizerState::A, token_str, ch),
'b' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("b".to_string())), TokenizerState::B, token_str, ch),
'd' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("d".to_string())), TokenizerState::D, token_str, ch),
'e' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("e".to_string())), TokenizerState::E, token_str, ch),
'f' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("f".to_string())), TokenizerState::F, token_str, ch),
'i' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("i".to_string())), TokenizerState::I, token_str, ch),
'g' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("g".to_string())), TokenizerState::G, token_str, ch),
'l' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("l".to_string())), TokenizerState::L, token_str, ch),
'n' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("n".to_string())), TokenizerState::N, token_str, ch),
'o' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("o".to_string())), TokenizerState::O, token_str, ch),
'r' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("r".to_string())), TokenizerState::R, token_str, ch),
't' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("t".to_string())), TokenizerState::T, token_str, ch),
'u' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("u".to_string())), TokenizerState::U, token_str, ch),
'w' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("w".to_string())), TokenizerState::W, token_str, ch),
',' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Comma), TokenizerState::Comma),
'=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Equals), TokenizerState::Equals),
'(' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::RoundOpen), TokenizerState::RoundOpen),
')' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::RoundClosed), TokenizerState::RoundClosed),
'.' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Dot), TokenizerState::Dot),
':' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Colon), TokenizerState::Colon),
'{' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::CurlyOpen), TokenizerState::CurlyOpen),
'}' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::CurlyClosed), TokenizerState::CurlyClosed),
'[' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::SquareOpen), TokenizerState::SquareOpen),
']' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::SquareClosed), TokenizerState::SquareClosed),
'+' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Plus), TokenizerState::Plus),
'~' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Tilde), TokenizerState::Tilde),
'>' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Gt), TokenizerState::Gt),
'<' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Lt), TokenizerState::Lt),
'#' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Hash), TokenizerState::Hash),
'|' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Pipe), TokenizerState::Pipe),
'&' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Ampersand), TokenizerState::Ampersand),
'%' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Percent), TokenizerState::Percent),
'*' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Star), TokenizerState::Star),
'/' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Slash), TokenizerState::Slash),
';' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Semicolon), TokenizerState::Semicolon),
'^' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Caret), TokenizerState::Caret),
'0' => tokenize_terminal(last_index, *index, token, state, Some(Token::IntLiteral("0".to_string())), TokenizerState::Zero, token_str, ch),
'"' =>
*token = None;
*state = TokenizerState::Quote;
'\'' =>
*token = None;
*state = TokenizerState::SingleQuote;
_ =>
if ch.is_whitespace() { }
else if ch.is_ascii_alphabetic() || ch == '_'
tokenize_terminal(last_index, *index, token, state, Some(Token::Name(token_str.clone())), TokenizerState::Name, token_str, ch);
else if ch.is_numeric() && ch.is_ascii()
tokenize_terminal(last_index, *index, token, state, Some(Token::IntLiteral(token_str.clone())), TokenizerState::Number, token_str, ch);
todo!("State {:?}, Char {}", state, ch);
TokenizerState::Quote =>
match ch
'\\' =>
*state = TokenizerState::QuoteBackslash;
'"' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::StringLiteral(token_str.clone())), TokenizerState::String),
_ =>
TokenizerState::QuoteBackslash =>
match ch
'a' =>
*state = TokenizerState::Quote;
'b' =>
*state = TokenizerState::Quote;
't' =>
*state = TokenizerState::Quote;
'n' | '\n' =>
*state = TokenizerState::Quote;
'v' =>
*state = TokenizerState::Quote;
'f' =>
*state = TokenizerState::Quote;
'r' =>
*state = TokenizerState::Quote;
'\\' =>
*state = TokenizerState::Quote;
'"' =>
*state = TokenizerState::Quote;
'\'' =>
*state = TokenizerState::Quote;
'z' =>
*state = TokenizerState::QuoteBackslashZ;
_ => return Err("Unknown escape sequence"),
TokenizerState::QuoteBackslashZ =>
match ch
'\\' =>
*state = TokenizerState::QuoteBackslash;
'"' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::StringLiteral(token_str.clone())), TokenizerState::String),
_ =>
if !ch.is_whitespace()
*state = TokenizerState::Quote;
TokenizerState::SingleQuote =>
match ch
'\\' =>
*state = TokenizerState::SingleQuoteBackslash;
'\'' =>
*last_index = *index as i32;
*token = Some(Token::StringLiteral(token_str.clone()));
*state = TokenizerState::String;
_ =>
TokenizerState::SingleQuoteBackslash =>
match ch
'a' =>
*state = TokenizerState::SingleQuote;
'b' =>
*state = TokenizerState::SingleQuote;
't' =>
*state = TokenizerState::SingleQuote;
'n' | '\n' =>
*state = TokenizerState::SingleQuote;
'v' =>
*state = TokenizerState::SingleQuote;
'f' =>
*state = TokenizerState::SingleQuote;
'r' =>
*state = TokenizerState::SingleQuote;
'\\' =>
*state = TokenizerState::SingleQuote;
'"' =>
*state = TokenizerState::SingleQuote;
'\'' =>
*state = TokenizerState::SingleQuote;
'z' =>
*state = TokenizerState::SingleQuoteBackslashZ;
_ => return Err("Unknown escape sequence"),
TokenizerState::SingleQuoteBackslashZ =>
match ch
'\\' =>
*state = TokenizerState::SingleQuoteBackslash;
'\'' =>
*last_index = *index as i32;
*token = Some(Token::StringLiteral(token_str.clone()));
*state = TokenizerState::String;
_ =>
if !ch.is_whitespace()
*state = TokenizerState::SingleQuote;
TokenizerState::String =>
let content = token_str.clone();
tokenize_backtrack_custom_token(last_index, index, tokens, token, token_str, state, Token::StringLiteral(content))?;
TokenizerState::Name => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Zero =>
match ch
'x' =>
*token = None;
*state = TokenizerState::HexNumberX;
_ =>
if ch.is_numeric() && ch.is_ascii()
*last_index = *index as i32;
*token = Some(Token::IntLiteral(token_str.clone()));
tokenize_backtrack(last_index, index, tokens, token, token_str, state)?;
TokenizerState::HexNumberX =>
if ch.is_ascii() && ch.is_numeric() || match ch
'A'..='F' | 'a'..='f' => true,
_ => false,
*last_index = *index as i32;
*token = Some(Token::HexLiteral(token_str.clone()));
*state = TokenizerState::HexNumber;
tokenize_backtrack(last_index, index, tokens, token, token_str, state)?;
TokenizerState::HexNumber =>
match ch
'p' =>
*token = None;
*state = TokenizerState::HexExpNumber;
_ =>
if ch.is_ascii() && ch.is_numeric() || match ch
'A'..='F' | 'a'..='f' => true,
_ => false,
*last_index = *index as i32;
*token = Some(Token::HexLiteral(token_str.clone()));
tokenize_backtrack(last_index, index, tokens, token, token_str, state)?;
TokenizerState::Number =>
match ch
'e' =>
*token = None;
*state = TokenizerState::ExpNumber;
_ =>
if ch.is_numeric() && ch.is_ascii()
*last_index = *index as i32;
*token = Some(Token::IntLiteral(token_str.clone()));
tokenize_backtrack(last_index, index, tokens, token, token_str, state)?;
TokenizerState::Comma | TokenizerState::RoundOpen | TokenizerState::RoundClosed |
TokenizerState::CurlyOpen | TokenizerState::CurlyClosed | TokenizerState::Plus |
TokenizerState::TildeEquals | TokenizerState::EqualsEquals | TokenizerState::Hash |
TokenizerState::GtEquals | TokenizerState::LtEquals | TokenizerState::SquareOpen |
TokenizerState::SquareClosed | TokenizerState::Pipe | TokenizerState::Ampersand |
TokenizerState::Percent | TokenizerState::Star | TokenizerState::Semicolon |
TokenizerState::Caret | TokenizerState::DotDotDot | TokenizerState::GtGt |
TokenizerState::LtLt | TokenizerState::SlashSlash => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Tilde =>
match ch
'=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::TildeEquals), TokenizerState::TildeEquals),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Gt =>
match ch
'>' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::GtGt), TokenizerState::GtGt),
'=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::GtEquals), TokenizerState::GtEquals),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Lt =>
match ch
'>' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::LtLt), TokenizerState::LtLt),
'=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::LtEquals), TokenizerState::LtEquals),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Slash =>
match ch
'/' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::SlashSlash), TokenizerState::SlashSlash),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Dot =>
match ch
'.' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::DotDot), TokenizerState::DotDot),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::DotDot =>
match ch
'.' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::DotDotDot), TokenizerState::DotDotDot),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Colon =>
match ch
':' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::ColonColon), TokenizerState::ColonColon),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Equals =>
match ch
'=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::EqualsEquals), TokenizerState::EqualsEquals),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Minus =>
match ch
'-' => tokenize_terminal_no_str(last_index, *index, token, state, None, TokenizerState::SmallCommentStart),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::SmallCommentStart =>
match ch
'[' =>
*token = None;
*state = TokenizerState::BigCommentLongBracketStart;
'\n' =>
*state = TokenizerState::Start;
*last_index = -1;
_ =>
*state = TokenizerState::SmallComment;
TokenizerState::SmallComment =>
match ch
'\n' =>
*state = TokenizerState::Start;
*last_index = -1;
_ => { }
TokenizerState::BigCommentLongBracketStart =>
match ch
'=' =>
*long_bracket_level += 1;
'[' =>
*state = TokenizerState::BigComment;
_ => return Err("Malformed long bracket at the beginning of a big comment"),
TokenizerState::BigComment =>
match ch
']' =>
*state = TokenizerState::BigCommentLongBracketEnd;
_ => { }
TokenizerState::BigCommentLongBracketEnd =>
match ch
'=' =>
if *long_bracket_level == 0
return Err("Long bracket level too big when ending big comment");
*long_bracket_level -= 1;
']' =>
if *long_bracket_level != 0
return Err("Long bracket level too small when ending big comment");
*state = TokenizerState::Start;
_ => return Err("Malformed long bracket when ending big comment"),
TokenizerState::A =>
match ch
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::An, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::An =>
match ch
'd' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::And, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::And => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::And)?,
TokenizerState::W =>
match ch
'h' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Wh, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Wh =>
match ch
'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Whi, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Whi =>
match ch
'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Whil, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Whil =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::While, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::While => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::While)?,
TokenizerState::B =>
match ch
'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Br, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Br =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Bre, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Bre =>
match ch
'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Brea, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Brea =>
match ch
'k' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Break, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Break => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Break)?,
TokenizerState::G =>
match ch
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Go, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Go =>
match ch
't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Got, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Got =>
match ch
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Goto, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Goto => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Goto)?,
TokenizerState::R =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Re, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Re =>
match ch
't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Ret, token_str, ch),
'p' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Rep, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Ret =>
match ch
'u' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Retu, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Retu =>
match ch
'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Retur, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Retur =>
match ch
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Return, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Return => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Return)?,
TokenizerState::Rep =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Repe, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Repe =>
match ch
'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Repea, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Repea =>
match ch
't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Repeat, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Repeat => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Repeat)?,
TokenizerState::N =>
match ch
'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Ni, token_str, ch),
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::No, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::No =>
match ch
't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Not, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Not => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Not)?,
TokenizerState::Ni =>
match ch
'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Nil, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Nil => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Nil)?,
TokenizerState::T =>
match ch
'h' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Th, token_str, ch),
'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Tr, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Th =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::The, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::The =>
match ch
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Then, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Then => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Then)?,
TokenizerState::Tr =>
match ch
'u' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Tru, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Tru =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::True, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::True => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::True)?,
TokenizerState::E =>
match ch
'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::El, token_str, ch),
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::En, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::En =>
match ch
'd' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::End, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::End => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::End)?,
TokenizerState::El =>
match ch
's' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Els, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Els =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Else, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Else =>
match ch
'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Elsei, token_str, ch),
_ => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Else)?,
TokenizerState::Elsei =>
match ch
'f' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Elseif, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Elseif => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Elseif)?,
TokenizerState::O =>
match ch
'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Or, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Or => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Or)?,
TokenizerState::D =>
match ch
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Do, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Do => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Do)?,
TokenizerState::I =>
match ch
'f' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::If, token_str, ch),
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::In, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::In => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::In)?,
TokenizerState::If => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::If)?,
TokenizerState::F =>
match ch
'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fa, token_str, ch),
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fo, token_str, ch),
'u' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fu, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Fu =>
match ch
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fun, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Fun =>
match ch
'c' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Func, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Func =>
match ch
't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Funct, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Funct =>
match ch
'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Functi, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Functi =>
match ch
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Functio, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Functio =>
match ch
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Function, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Function => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Function)?,
TokenizerState::Fa =>
match ch
'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fal, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Fal =>
match ch
's' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fals, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Fals =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::False, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::False => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::False)?,
TokenizerState::Fo =>
match ch
'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::For, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::For => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::For)?,
TokenizerState::L =>
match ch
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Lo, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Lo =>
match ch
'c' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Loc, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Loc =>
match ch
'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Loca, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Loca =>
match ch
'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Local, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Local => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Local)?,
TokenizerState::U =>
match ch
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Un, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Un =>
match ch
't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Unt, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Unt =>
match ch
'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Unti, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Unti =>
match ch
'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Until, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Until => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Until)?,
_ => todo!("State: {:?}", state),
return Ok(());
fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
let mut tokens: Vec<Token> = Vec::new();
let mut state = TokenizerState::Start;
let char_vec: Vec<char> = file_content.chars().collect();
let mut last_index: i32 = -1;
let mut index = 0;
let mut token: Option<Token> = None;
let mut token_str: String = String::new();
let mut long_bracket_level = 0;
while index < char_vec.len()
let ch = char_vec[index];
tokenize_char(&mut state, ch, &mut last_index, &mut index, &mut token, &mut token_str, &mut tokens, &mut long_bracket_level)?;
index += 1;
match state
TokenizerState::Name => tokenize_backtrack_name(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state)?,
TokenizerState::End => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::End)?,
TokenizerState::And => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::And)?,
TokenizerState::Semicolon => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::Semicolon)?,
_ => todo!("state: {:?}", state),
return Ok(tokens);
Normal file
Normal file
@ -0,0 +1,1186 @@
#[derive(Debug, Clone)]
pub enum Token
And, Break, Do, Else, Elseif, End,
False, For, Function, Goto, If, In,
Local, Nil, Not, Or, Repeat, Return,
Then, True, Until, While,
Plus, Minus, Star, Slash, Percent, Caret, Hash,
Ampersand, Tilde, Pipe, LtLt, GtGt, SlashSlash,
EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals,
RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon,
Semicolon, Colon, Comma, Dot, DotDot, DotDotDot,
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum TokenizerState
Quote, SingleQuote, Name, Number, Zero,
A, B, D, E, F, G, I, L, N, O, R, T, U, W,
Plus, Minus, Star, Slash, Percent, Caret, Hash,
Ampersand, Tilde, Pipe, Lt, Gt, Equals, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed,
Colon, Semicolon, Comma, Dot,
An, Br, Do, El, En, Fa, Fo, Fu, Go, If, In, Lo, Ni, No, Or, Re, Th, Tr, Un, Wh,
LtLt, GtGt, SlashSlash, EqualsEquals, TildeEquals, LtEquals, GtEquals, ColonColon, DotDot,
SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ,
BigCommentLongBracketStart, SmallComment,
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,
BigComment, BigCommentLongBracketEnd,
Break, Elsei, False, Funct, Local, Repea, Retur, Until, While,
Elseif, Functi, Repeat, Return,
fn tokenize_update_index_and_state(last_index: &mut i32, index: usize, state: &mut TokenizerState, new_state: TokenizerState)
*last_index = index as i32;
*state = new_state;
fn tokenize_terminal_no_str(last_index: &mut i32, index: usize, token: &mut Option<Token>, state: &mut TokenizerState, new_token: Option<Token>, new_state: TokenizerState)
tokenize_update_index_and_state(last_index, index, state, new_state);
*token = new_token;
fn tokenize_terminal_no_token(last_index: &mut i32, index: usize, state: &mut TokenizerState, new_state: TokenizerState, token_str: &mut String, ch: char)
tokenize_update_index_and_state(last_index, index, state, new_state);
fn tokenize_terminal(last_index: &mut i32, index: usize, token: &mut Option<Token>, state: &mut TokenizerState, new_token: Option<Token>, new_state: TokenizerState, token_str: &mut String, ch: char)
tokenize_terminal_no_str(last_index, index, token, state, new_token, new_state);
fn tokenize_backtrack(last_index: &mut i32, index: &mut usize, tokens: &mut Vec<Token>, token: &mut Option<Token>, token_str: &mut String, state: &mut TokenizerState) -> Result<(), &'static str>
return tokenize_backtrack_custom_token(last_index, index, tokens, token, token_str, state, token.clone().unwrap());
fn tokenize_backtrack_name(last_index: &mut i32, index: &mut usize, tokens: &mut Vec<Token>, token: &mut Option<Token>, token_str: &mut String, state: &mut TokenizerState) -> Result<(), &'static str>
if *last_index == -1 || token.is_none()
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
return Err("Lexerr");
*index = *last_index as usize;
*last_index = -1;
*token = None;
*state = TokenizerState::Start;
return Ok(());
fn tokenize_backtrack_custom_token(last_index: &mut i32, index: &mut usize, tokens: &mut Vec<Token>, token: &mut Option<Token>, token_str: &mut String, state: &mut TokenizerState, new_token: Token) -> Result<(), &'static str>
if *last_index == -1 || token.is_none()
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
return Err("Lexerr");
*index = *last_index as usize;
*last_index = -1;
*token = None;
*state = TokenizerState::Start;
return Ok(());
fn tokenize_alphanumeric_nonstart(last_index: &mut i32, index: &mut usize, tokens: &mut Vec<Token>, token: &mut Option<Token>, token_str: &mut String, state: &mut TokenizerState, ch: char) -> Result<(), &'static str>
if ch.is_ascii_alphanumeric() || ch == '_'
tokenize_update_index_and_state(last_index, *index, state, TokenizerState::Name);
tokenize_backtrack_name(last_index, index, tokens, token, token_str, state)?;
return Ok(());
fn tokenize_alphanumeric_nonstart_custom(last_index: &mut i32, index: &mut usize, tokens: &mut Vec<Token>, token: &mut Option<Token>, token_str: &mut String, state: &mut TokenizerState, ch: char, new_token: Token) -> Result<(), &'static str>
if ch.is_ascii_alphanumeric() || ch == '_'
tokenize_update_index_and_state(last_index, *index, state, TokenizerState::Name);
tokenize_backtrack_custom_token(last_index, index, tokens, token, token_str, state, new_token)?;
return Ok(());
fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, index: &mut usize, token: &mut Option<Token>, token_str: &mut String, tokens: &mut Vec<Token>, long_bracket_level: &mut u32) -> Result<(), &'static str>
match state
TokenizerState::Start =>
match ch
'-' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Minus), TokenizerState::Minus),
'a' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("a".to_string())), TokenizerState::A, token_str, ch),
'b' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("b".to_string())), TokenizerState::B, token_str, ch),
'd' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("d".to_string())), TokenizerState::D, token_str, ch),
'e' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("e".to_string())), TokenizerState::E, token_str, ch),
'f' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("f".to_string())), TokenizerState::F, token_str, ch),
'i' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("i".to_string())), TokenizerState::I, token_str, ch),
'g' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("g".to_string())), TokenizerState::G, token_str, ch),
'l' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("l".to_string())), TokenizerState::L, token_str, ch),
'n' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("n".to_string())), TokenizerState::N, token_str, ch),
'o' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("o".to_string())), TokenizerState::O, token_str, ch),
'r' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("r".to_string())), TokenizerState::R, token_str, ch),
't' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("t".to_string())), TokenizerState::T, token_str, ch),
'u' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("u".to_string())), TokenizerState::U, token_str, ch),
'w' => tokenize_terminal(last_index, *index, token, state, Some(Token::Name("w".to_string())), TokenizerState::W, token_str, ch),
',' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Comma), TokenizerState::Comma),
'=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Equals), TokenizerState::Equals),
'(' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::RoundOpen), TokenizerState::RoundOpen),
')' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::RoundClosed), TokenizerState::RoundClosed),
'.' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Dot), TokenizerState::Dot),
':' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Colon), TokenizerState::Colon),
'{' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::CurlyOpen), TokenizerState::CurlyOpen),
'}' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::CurlyClosed), TokenizerState::CurlyClosed),
'[' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::SquareOpen), TokenizerState::SquareOpen),
']' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::SquareClosed), TokenizerState::SquareClosed),
'+' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Plus), TokenizerState::Plus),
'~' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Tilde), TokenizerState::Tilde),
'>' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Gt), TokenizerState::Gt),
'<' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Lt), TokenizerState::Lt),
'#' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Hash), TokenizerState::Hash),
'|' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Pipe), TokenizerState::Pipe),
'&' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Ampersand), TokenizerState::Ampersand),
'%' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Percent), TokenizerState::Percent),
'*' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Star), TokenizerState::Star),
'/' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Slash), TokenizerState::Slash),
';' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Semicolon), TokenizerState::Semicolon),
'^' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Caret), TokenizerState::Caret),
'0' => tokenize_terminal(last_index, *index, token, state, Some(Token::IntLiteral("0".to_string())), TokenizerState::Zero, token_str, ch),
'"' =>
*token = None;
*state = TokenizerState::Quote;
'\'' =>
*token = None;
*state = TokenizerState::SingleQuote;
_ =>
if ch.is_whitespace() { }
else if ch.is_ascii_alphabetic() || ch == '_'
tokenize_terminal(last_index, *index, token, state, Some(Token::Name(token_str.clone())), TokenizerState::Name, token_str, ch);
else if ch.is_numeric() && ch.is_ascii()
tokenize_terminal(last_index, *index, token, state, Some(Token::IntLiteral(token_str.clone())), TokenizerState::Number, token_str, ch);
todo!("State {:?}, Char {}", state, ch);
TokenizerState::Quote =>
match ch
'\\' =>
*state = TokenizerState::QuoteBackslash;
'"' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::StringLiteral(token_str.clone())), TokenizerState::String),
_ =>
TokenizerState::QuoteBackslash =>
match ch
'a' =>
*state = TokenizerState::Quote;
'b' =>
*state = TokenizerState::Quote;
't' =>
*state = TokenizerState::Quote;
'n' | '\n' =>
*state = TokenizerState::Quote;
'v' =>
*state = TokenizerState::Quote;
'f' =>
*state = TokenizerState::Quote;
'r' =>
*state = TokenizerState::Quote;
'\\' =>
*state = TokenizerState::Quote;
'"' =>
*state = TokenizerState::Quote;
'\'' =>
*state = TokenizerState::Quote;
'z' =>
*state = TokenizerState::QuoteBackslashZ;
_ => return Err("Unknown escape sequence"),
TokenizerState::QuoteBackslashZ =>
match ch
'\\' =>
*state = TokenizerState::QuoteBackslash;
'"' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::StringLiteral(token_str.clone())), TokenizerState::String),
_ =>
if !ch.is_whitespace()
*state = TokenizerState::Quote;
TokenizerState::SingleQuote =>
match ch
'\\' =>
*state = TokenizerState::SingleQuoteBackslash;
'\'' =>
*last_index = *index as i32;
*token = Some(Token::StringLiteral(token_str.clone()));
*state = TokenizerState::String;
_ =>
TokenizerState::SingleQuoteBackslash =>
match ch
'a' =>
*state = TokenizerState::SingleQuote;
'b' =>
*state = TokenizerState::SingleQuote;
't' =>
*state = TokenizerState::SingleQuote;
'n' | '\n' =>
*state = TokenizerState::SingleQuote;
'v' =>
*state = TokenizerState::SingleQuote;
'f' =>
*state = TokenizerState::SingleQuote;
'r' =>
*state = TokenizerState::SingleQuote;
'\\' =>
*state = TokenizerState::SingleQuote;
'"' =>
*state = TokenizerState::SingleQuote;
'\'' =>
*state = TokenizerState::SingleQuote;
'z' =>
*state = TokenizerState::SingleQuoteBackslashZ;
_ => return Err("Unknown escape sequence"),
TokenizerState::SingleQuoteBackslashZ =>
match ch
'\\' =>
*state = TokenizerState::SingleQuoteBackslash;
'\'' =>
*last_index = *index as i32;
*token = Some(Token::StringLiteral(token_str.clone()));
*state = TokenizerState::String;
_ =>
if !ch.is_whitespace()
*state = TokenizerState::SingleQuote;
TokenizerState::String =>
let content = token_str.clone();
tokenize_backtrack_custom_token(last_index, index, tokens, token, token_str, state, Token::StringLiteral(content))?;
TokenizerState::Name => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Zero =>
match ch
'x' =>
*token = None;
*state = TokenizerState::HexNumberX;
_ =>
if ch.is_numeric() && ch.is_ascii()
*last_index = *index as i32;
*token = Some(Token::IntLiteral(token_str.clone()));
tokenize_backtrack(last_index, index, tokens, token, token_str, state)?;
TokenizerState::HexNumberX =>
if ch.is_ascii() && ch.is_numeric() || match ch
'A'..='F' | 'a'..='f' => true,
_ => false,
*last_index = *index as i32;
*token = Some(Token::HexLiteral(token_str.clone()));
*state = TokenizerState::HexNumber;
tokenize_backtrack(last_index, index, tokens, token, token_str, state)?;
TokenizerState::HexNumber =>
match ch
'p' =>
*token = None;
*state = TokenizerState::HexExpNumber;
_ =>
if ch.is_ascii() && ch.is_numeric() || match ch
'A'..='F' | 'a'..='f' => true,
_ => false,
*last_index = *index as i32;
*token = Some(Token::HexLiteral(token_str.clone()));
tokenize_backtrack(last_index, index, tokens, token, token_str, state)?;
TokenizerState::Number =>
match ch
'e' =>
*token = None;
*state = TokenizerState::ExpNumber;
_ =>
if ch.is_numeric() && ch.is_ascii()
*last_index = *index as i32;
*token = Some(Token::IntLiteral(token_str.clone()));
tokenize_backtrack(last_index, index, tokens, token, token_str, state)?;
TokenizerState::Comma | TokenizerState::RoundOpen | TokenizerState::RoundClosed |
TokenizerState::CurlyOpen | TokenizerState::CurlyClosed | TokenizerState::Plus |
TokenizerState::TildeEquals | TokenizerState::EqualsEquals | TokenizerState::Hash |
TokenizerState::GtEquals | TokenizerState::LtEquals | TokenizerState::SquareOpen |
TokenizerState::SquareClosed | TokenizerState::Pipe | TokenizerState::Ampersand |
TokenizerState::Percent | TokenizerState::Star | TokenizerState::Semicolon |
TokenizerState::Caret | TokenizerState::DotDotDot | TokenizerState::GtGt |
TokenizerState::LtLt | TokenizerState::SlashSlash => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Tilde =>
match ch
'=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::TildeEquals), TokenizerState::TildeEquals),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Gt =>
match ch
'>' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::GtGt), TokenizerState::GtGt),
'=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::GtEquals), TokenizerState::GtEquals),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Lt =>
match ch
'>' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::LtLt), TokenizerState::LtLt),
'=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::LtEquals), TokenizerState::LtEquals),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Slash =>
match ch
'/' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::SlashSlash), TokenizerState::SlashSlash),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Dot =>
match ch
'.' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::DotDot), TokenizerState::DotDot),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::DotDot =>
match ch
'.' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::DotDotDot), TokenizerState::DotDotDot),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Colon =>
match ch
':' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::ColonColon), TokenizerState::ColonColon),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Equals =>
match ch
'=' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::EqualsEquals), TokenizerState::EqualsEquals),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::Minus =>
match ch
'-' => tokenize_terminal_no_str(last_index, *index, token, state, None, TokenizerState::SmallCommentStart),
_ => tokenize_backtrack(last_index, index, tokens, token, token_str, state)?,
TokenizerState::SmallCommentStart =>
match ch
'[' =>
*token = None;
*state = TokenizerState::BigCommentLongBracketStart;
'\n' =>
*state = TokenizerState::Start;
*last_index = -1;
_ =>
*state = TokenizerState::SmallComment;
TokenizerState::SmallComment =>
match ch
'\n' =>
*state = TokenizerState::Start;
*last_index = -1;
_ => { }
TokenizerState::BigCommentLongBracketStart =>
match ch
'=' =>
*long_bracket_level += 1;
'[' =>
*state = TokenizerState::BigComment;
_ => return Err("Malformed long bracket at the beginning of a big comment"),
TokenizerState::BigComment =>
match ch
']' =>
*state = TokenizerState::BigCommentLongBracketEnd;
_ => { }
TokenizerState::BigCommentLongBracketEnd =>
match ch
'=' =>
if *long_bracket_level == 0
return Err("Long bracket level too big when ending big comment");
*long_bracket_level -= 1;
']' =>
if *long_bracket_level != 0
return Err("Long bracket level too small when ending big comment");
*state = TokenizerState::Start;
_ => return Err("Malformed long bracket when ending big comment"),
TokenizerState::A =>
match ch
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::An, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::An =>
match ch
'd' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::And, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::And => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::And)?,
TokenizerState::W =>
match ch
'h' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Wh, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Wh =>
match ch
'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Whi, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Whi =>
match ch
'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Whil, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Whil =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::While, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::While => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::While)?,
TokenizerState::B =>
match ch
'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Br, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Br =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Bre, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Bre =>
match ch
'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Brea, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Brea =>
match ch
'k' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Break, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Break => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Break)?,
TokenizerState::G =>
match ch
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Go, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Go =>
match ch
't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Got, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Got =>
match ch
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Goto, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Goto => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Goto)?,
TokenizerState::R =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Re, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Re =>
match ch
't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Ret, token_str, ch),
'p' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Rep, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Ret =>
match ch
'u' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Retu, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Retu =>
match ch
'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Retur, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Retur =>
match ch
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Return, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Return => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Return)?,
TokenizerState::Rep =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Repe, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Repe =>
match ch
'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Repea, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Repea =>
match ch
't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Repeat, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Repeat => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Repeat)?,
TokenizerState::N =>
match ch
'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Ni, token_str, ch),
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::No, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::No =>
match ch
't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Not, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Not => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Not)?,
TokenizerState::Ni =>
match ch
'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Nil, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Nil => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Nil)?,
TokenizerState::T =>
match ch
'h' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Th, token_str, ch),
'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Tr, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Th =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::The, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::The =>
match ch
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Then, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Then => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Then)?,
TokenizerState::Tr =>
match ch
'u' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Tru, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Tru =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::True, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::True => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::True)?,
TokenizerState::E =>
match ch
'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::El, token_str, ch),
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::En, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::En =>
match ch
'd' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::End, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::End => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::End)?,
TokenizerState::El =>
match ch
's' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Els, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Els =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Else, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Else =>
match ch
'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Elsei, token_str, ch),
_ => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Else)?,
TokenizerState::Elsei =>
match ch
'f' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Elseif, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Elseif => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Elseif)?,
TokenizerState::O =>
match ch
'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Or, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Or => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Or)?,
TokenizerState::D =>
match ch
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Do, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Do => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Do)?,
TokenizerState::I =>
match ch
'f' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::If, token_str, ch),
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::In, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::In => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::In)?,
TokenizerState::If => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::If)?,
TokenizerState::F =>
match ch
'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fa, token_str, ch),
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fo, token_str, ch),
'u' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fu, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Fu =>
match ch
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fun, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Fun =>
match ch
'c' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Func, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Func =>
match ch
't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Funct, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Funct =>
match ch
'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Functi, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Functi =>
match ch
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Functio, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Functio =>
match ch
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Function, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Function => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Function)?,
TokenizerState::Fa =>
match ch
'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fal, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Fal =>
match ch
's' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Fals, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Fals =>
match ch
'e' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::False, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::False => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::False)?,
TokenizerState::Fo =>
match ch
'r' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::For, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::For => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::For)?,
TokenizerState::L =>
match ch
'o' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Lo, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Lo =>
match ch
'c' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Loc, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Loc =>
match ch
'a' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Loca, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Loca =>
match ch
'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Local, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Local => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Local)?,
TokenizerState::U =>
match ch
'n' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Un, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Un =>
match ch
't' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Unt, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Unt =>
match ch
'i' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Unti, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Unti =>
match ch
'l' => tokenize_terminal_no_token(last_index, *index, state, TokenizerState::Until, token_str, ch),
_ => tokenize_alphanumeric_nonstart(last_index, index, tokens, token, token_str, state, ch)?,
TokenizerState::Until => tokenize_alphanumeric_nonstart_custom(last_index, index, tokens, token, token_str, state, ch, Token::Until)?,
_ => todo!("State: {:?}", state),
return Ok(());
pub fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
let mut tokens: Vec<Token> = Vec::new();
let mut state = TokenizerState::Start;
let char_vec: Vec<char> = file_content.chars().collect();
let mut last_index: i32 = -1;
let mut index = 0;
let mut token: Option<Token> = None;
let mut token_str: String = String::new();
let mut long_bracket_level = 0;
while index < char_vec.len()
let ch = char_vec[index];
tokenize_char(&mut state, ch, &mut last_index, &mut index, &mut token, &mut token_str, &mut tokens, &mut long_bracket_level)?;
index += 1;
match state
TokenizerState::Name => tokenize_backtrack_name(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state)?,
TokenizerState::End => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::End)?,
TokenizerState::And => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::And)?,
TokenizerState::Semicolon => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::Semicolon)?,
_ => todo!("state: {:?}", state),
return Ok(tokens);
Reference in New Issue
Block a user