Implement more states for the tokenizer
This commit is contained in:
parent
21c5098cf7
commit
a149609f03
504
src/main.rs
504
src/main.rs
@ -1,4 +1,4 @@
|
||||
use std::{env, fs, fmt::Error};
|
||||
use std::{env, fs};
|
||||
|
||||
fn main()
|
||||
{
|
||||
@ -51,13 +51,14 @@ enum TokenizerState
|
||||
|
||||
An, Br, Do, El, En, Fa, Fo, Fu, Go, If, In, Lo, Ni, No, Or, Re, Th, Tr, Un, Wh,
|
||||
LtLt, GtGt, SlashSlash, EqualsEquals, TildeEquals, LtEquals, GtEquals, ColonColon, DotDot,
|
||||
SmallComment, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
|
||||
SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
|
||||
|
||||
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
|
||||
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ,
|
||||
BigComment,
|
||||
BigCommentLongBracketStart, SmallComment,
|
||||
|
||||
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,
|
||||
BigComment, BigCommentLongBracketEnd,
|
||||
|
||||
Break, Elsei, False, Funct, Local, Repea, Retur, Until, While,
|
||||
|
||||
@ -78,6 +79,7 @@ fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
|
||||
let mut index = 0;
|
||||
let mut token: Option<Token> = None;
|
||||
let mut token_str: String = String::new();
|
||||
let mut long_bracket_level = 0;
|
||||
|
||||
while index < char_vec.len()
|
||||
{
|
||||
@ -282,6 +284,48 @@ fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
|
||||
token = Some(Token::Hash);
|
||||
state = TokenizerState::Hash;
|
||||
}
|
||||
'|' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Pipe);
|
||||
state = TokenizerState::Pipe;
|
||||
}
|
||||
'&' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Ampersand);
|
||||
state = TokenizerState::Ampersand;
|
||||
}
|
||||
'%' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Percent);
|
||||
state = TokenizerState::Percent;
|
||||
}
|
||||
'*' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Star);
|
||||
state = TokenizerState::Star;
|
||||
}
|
||||
'/' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Slash);
|
||||
state = TokenizerState::Slash;
|
||||
}
|
||||
';' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Semicolon);
|
||||
state = TokenizerState::Semicolon;
|
||||
}
|
||||
'^' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Caret);
|
||||
state = TokenizerState::Caret;
|
||||
}
|
||||
'0' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
@ -318,7 +362,7 @@ fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
|
||||
}
|
||||
else
|
||||
{
|
||||
todo!("State {:?}, Char {}, {:?}", state, ch, tokens);
|
||||
todo!("State {:?}, Char {}", state, ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -710,7 +754,10 @@ fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
|
||||
TokenizerState::Comma | TokenizerState::RoundOpen | TokenizerState::RoundClosed |
|
||||
TokenizerState::CurlyOpen | TokenizerState::CurlyClosed | TokenizerState::Plus |
|
||||
TokenizerState::TildeEquals | TokenizerState::EqualsEquals | TokenizerState::Hash |
|
||||
TokenizerState::GtEquals | TokenizerState::LtEquals | TokenizerState::SquareOpen | TokenizerState::SquareClosed =>
|
||||
TokenizerState::GtEquals | TokenizerState::LtEquals | TokenizerState::SquareOpen |
|
||||
TokenizerState::SquareClosed | TokenizerState::Pipe | TokenizerState::Ampersand |
|
||||
TokenizerState::Percent | TokenizerState::Star | TokenizerState::Semicolon |
|
||||
TokenizerState::Caret =>
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
@ -814,6 +861,74 @@ fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenizerState::GtGt =>
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
tokens.push(Token::GtGt);
|
||||
}
|
||||
TokenizerState::LtLt =>
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
tokens.push(Token::LtLt);
|
||||
}
|
||||
TokenizerState::Slash =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
'/' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::SlashSlash);
|
||||
state = TokenizerState::SlashSlash;
|
||||
}
|
||||
_ =>
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
tokens.push(token.clone().unwrap());
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenizerState::SlashSlash =>
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
tokens.push(Token::SlashSlash);
|
||||
}
|
||||
TokenizerState::Dot =>
|
||||
{
|
||||
match ch
|
||||
@ -940,7 +1055,7 @@ fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = None;
|
||||
state = TokenizerState::SmallComment;
|
||||
state = TokenizerState::SmallCommentStart;
|
||||
}
|
||||
_ =>
|
||||
{
|
||||
@ -958,15 +1073,14 @@ fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenizerState::SmallComment =>
|
||||
TokenizerState::SmallCommentStart =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
'[' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = None;
|
||||
state = TokenizerState::BigComment;
|
||||
state = TokenizerState::BigCommentLongBracketStart;
|
||||
}
|
||||
'\n' =>
|
||||
{
|
||||
@ -975,10 +1089,71 @@ fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
|
||||
}
|
||||
_ =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
state = TokenizerState::SmallComment;
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenizerState::SmallComment =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
'\n' =>
|
||||
{
|
||||
state = TokenizerState::Start;
|
||||
last_index = -1;
|
||||
}
|
||||
_ => { }
|
||||
}
|
||||
}
|
||||
TokenizerState::BigCommentLongBracketStart =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
'=' =>
|
||||
{
|
||||
long_bracket_level += 1;
|
||||
}
|
||||
'[' =>
|
||||
{
|
||||
state = TokenizerState::BigComment;
|
||||
}
|
||||
_ => return Err("Malformed long bracket at the beginning of a big comment"),
|
||||
}
|
||||
}
|
||||
TokenizerState::BigComment =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
']' =>
|
||||
{
|
||||
state = TokenizerState::BigCommentLongBracketEnd;
|
||||
}
|
||||
_ => { }
|
||||
}
|
||||
}
|
||||
TokenizerState::BigCommentLongBracketEnd =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
'=' =>
|
||||
{
|
||||
if long_bracket_level == 0
|
||||
{
|
||||
return Err("Long bracket level too big when ending big comment");
|
||||
}
|
||||
long_bracket_level -= 1;
|
||||
}
|
||||
']' =>
|
||||
{
|
||||
if long_bracket_level != 0
|
||||
{
|
||||
return Err("Long bracket level too small when ending big comment");
|
||||
}
|
||||
state = TokenizerState::Start;
|
||||
}
|
||||
_ => return Err("Malformed long bracket when ending big comment"),
|
||||
}
|
||||
}
|
||||
TokenizerState::A =>
|
||||
{
|
||||
match ch
|
||||
@ -1772,6 +1947,141 @@ fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
|
||||
tokens.push(Token::Return);
|
||||
}
|
||||
}
|
||||
TokenizerState::Rep =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
'e' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Name("repe".to_string()));
|
||||
token_str.push(ch);
|
||||
state = TokenizerState::Repe;
|
||||
}
|
||||
_ =>
|
||||
{
|
||||
if ch.is_ascii_alphanumeric() || ch == '_'
|
||||
{
|
||||
last_index = index as i32;
|
||||
token_str.push(ch);
|
||||
token = Some(Token::Name(token_str.clone()));
|
||||
state = TokenizerState::Name;
|
||||
}
|
||||
else
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
tokens.push(token.unwrap().clone());
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenizerState::Repe =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
'a' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Name("repea".to_string()));
|
||||
token_str.push(ch);
|
||||
state = TokenizerState::Repea;
|
||||
}
|
||||
_ =>
|
||||
{
|
||||
if ch.is_ascii_alphanumeric() || ch == '_'
|
||||
{
|
||||
last_index = index as i32;
|
||||
token_str.push(ch);
|
||||
token = Some(Token::Name(token_str.clone()));
|
||||
state = TokenizerState::Name;
|
||||
}
|
||||
else
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
tokens.push(token.unwrap().clone());
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenizerState::Repea =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
't' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Name("repeat".to_string()));
|
||||
token_str.push(ch);
|
||||
state = TokenizerState::Repeat;
|
||||
}
|
||||
_ =>
|
||||
{
|
||||
if ch.is_ascii_alphanumeric() || ch == '_'
|
||||
{
|
||||
last_index = index as i32;
|
||||
token_str.push(ch);
|
||||
token = Some(Token::Name(token_str.clone()));
|
||||
state = TokenizerState::Name;
|
||||
}
|
||||
else
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
tokens.push(token.unwrap().clone());
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenizerState::Repeat =>
|
||||
{
|
||||
if ch.is_ascii_alphanumeric() || ch == '_'
|
||||
{
|
||||
last_index = index as i32;
|
||||
token_str.push(ch);
|
||||
token = Some(Token::Name(token_str.clone()));
|
||||
state = TokenizerState::Name;
|
||||
}
|
||||
else
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
tokens.push(Token::Repeat);
|
||||
}
|
||||
}
|
||||
TokenizerState::N =>
|
||||
{
|
||||
match ch
|
||||
@ -3334,7 +3644,179 @@ fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
|
||||
tokens.push(Token::Local);
|
||||
}
|
||||
}
|
||||
_ => todo!("State {:?} | {:?}", state, tokens)
|
||||
TokenizerState::U =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
'n' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Name("un".to_string()));
|
||||
token_str.push(ch);
|
||||
state = TokenizerState::Un;
|
||||
}
|
||||
_ =>
|
||||
{
|
||||
if ch.is_ascii_alphanumeric() || ch == '_'
|
||||
{
|
||||
last_index = index as i32;
|
||||
token_str.push(ch);
|
||||
token = Some(Token::Name(token_str.clone()));
|
||||
state = TokenizerState::Name;
|
||||
}
|
||||
else
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
tokens.push(token.clone().unwrap());
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenizerState::Un =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
't' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Name("unt".to_string()));
|
||||
token_str.push(ch);
|
||||
state = TokenizerState::Unt;
|
||||
}
|
||||
_ =>
|
||||
{
|
||||
if ch.is_ascii_alphanumeric() || ch == '_'
|
||||
{
|
||||
last_index = index as i32;
|
||||
token_str.push(ch);
|
||||
token = Some(Token::Name(token_str.clone()));
|
||||
state = TokenizerState::Name;
|
||||
}
|
||||
else
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
tokens.push(token.unwrap().clone());
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenizerState::Unt =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
'i' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Name("unti".to_string()));
|
||||
token_str.push(ch);
|
||||
state = TokenizerState::Unti;
|
||||
}
|
||||
_ =>
|
||||
{
|
||||
if ch.is_ascii_alphanumeric() || ch == '_'
|
||||
{
|
||||
last_index = index as i32;
|
||||
token_str.push(ch);
|
||||
token = Some(Token::Name(token_str.clone()));
|
||||
state = TokenizerState::Name;
|
||||
}
|
||||
else
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
tokens.push(token.unwrap().clone());
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenizerState::Unti =>
|
||||
{
|
||||
match ch
|
||||
{
|
||||
'l' =>
|
||||
{
|
||||
last_index = index as i32;
|
||||
token = Some(Token::Name("until".to_string()));
|
||||
token_str.push(ch);
|
||||
state = TokenizerState::Until;
|
||||
}
|
||||
_ =>
|
||||
{
|
||||
if ch.is_ascii_alphanumeric() || ch == '_'
|
||||
{
|
||||
last_index = index as i32;
|
||||
token_str.push(ch);
|
||||
token = Some(Token::Name(token_str.clone()));
|
||||
state = TokenizerState::Name;
|
||||
}
|
||||
else
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
tokens.push(token.unwrap().clone());
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
TokenizerState::Until =>
|
||||
{
|
||||
if ch.is_ascii_alphanumeric() || ch == '_'
|
||||
{
|
||||
last_index = index as i32;
|
||||
token_str.push(ch);
|
||||
token = Some(Token::Name(token_str.clone()));
|
||||
state = TokenizerState::Name;
|
||||
}
|
||||
else
|
||||
{
|
||||
if last_index == -1 || token.is_none()
|
||||
{
|
||||
println!("{}|{}|{:?} | {:?}", last_index, index, token, tokens);
|
||||
return Err("Lexerr");
|
||||
}
|
||||
index = last_index as usize;
|
||||
last_index = -1;
|
||||
token = None;
|
||||
token_str.clear();
|
||||
state = TokenizerState::Start;
|
||||
tokens.push(Token::Until);
|
||||
}
|
||||
}
|
||||
_ => todo!("State {:?}", state)
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user