Compare commits

..

5 Commits

Author SHA1 Message Date
0ca8a731a9 Implement LL-like parser 2023-07-20 15:20:28 +02:00
c3e2565ff9 Start work on a LL inspired parser since CYK is too slow 2023-07-19 12:57:37 +02:00
e53734dcca Add terribly slow cyk parser 2023-07-12 23:25:20 +02:00
5a2171b023 Add grammar 2023-07-11 17:38:26 +02:00
bee2fcb62b Start working on a parser 2023-06-15 06:44:47 +02:00
4 changed files with 1721 additions and 12 deletions

480
src/grammar.rs Normal file
View File

@ -0,0 +1,480 @@
use crate::tokenizer::Token;
pub enum Rule
{
Terminal(u8, Token),
NonTerminal(u8, u8, u8)
}
pub const NONTERMINAL_NAMES: [&str; 115] =
[
"stat__15",
"funcbody__50",
"fieldlist",
"fieldlist__1",
"namelist",
"parlist",
">_non",
"chunk",
",_non",
"field__29",
"stat__11",
"stat__45",
"stat__33",
"funcname",
"prefixexp__25",
";_?",
"stat__5",
"stat__38",
"else_non",
"stat__16",
"<_non",
"explist",
"stat_*",
"{_non",
"funcnamedotexpansion_*",
"stat__6",
"funcnamedotexpansion",
"moreattribs_*",
"field__28",
"retstat_?",
"stat__9",
"stat__37",
"field",
"local_non",
")_non",
"morevars_*",
"moreattribs__19",
"morevars",
"forthirdarg_?",
"::_non",
"stat",
"morefields_*",
"]_non",
"[_non",
"assign",
"field__31",
"stat__4",
"do_non",
"unop",
"elseifblocks__18",
"elseblock_?",
"label__21",
"funcbody__51",
"in_non",
"stat__34",
"exp",
"funcnamecolonexpansion_?",
"(_non",
"if_non",
"stat__7",
"attrib__20",
"elseifblocks__17",
"fieldsep_?",
"while_non",
"elseifblocks_*",
"binop",
"..._non",
"then_non",
"return_non",
"._non",
"stat__35",
"prefixexp",
"attnamelist",
"functioncall__26",
"elseifblocks",
"function_non",
"}_non",
"var__22",
"tableconstructor__53",
"varlist",
"S_0",
"Name_non",
"goto_non",
"parlistvarargs_?",
"elseif_non",
"for_non",
"stat__8",
"args",
"stat__41",
"stat__14",
"until_non",
"morenames_*",
"stat__39",
"stat__32",
"end_non",
"attnamelist__46",
"moreexps_*",
"stat__10",
"=_non",
":_non",
"stat__36",
"functioncall__27",
"funcname__48",
"attrib",
"repeat_non",
"args__49",
"var",
"var__23",
"stat__40",
"exp__0",
"stat__42",
"morefields",
"moreattribs",
"funcbody",
"retstat__47",
];
pub const TERMINAL_RULES: [(u8, Token); 125] =
[
(57, Token::RoundOpen),
(34, Token::RoundClosed),
(8, Token::Comma),
(66, Token::DotDotDot),
(69, Token::Dot),
(39, Token::ColonColon),
(99, Token::Colon),
(15, Token::Semicolon),
(20, Token::Lt),
(98, Token::Equals),
(6, Token::Gt),
(81, Token::Name(String::new())),
(80, Token::Return),
(80, Token::Semicolon),
(80, Token::Break),
(43, Token::SquareOpen),
(42, Token::SquareClosed),
(87, Token::StringLiteral(String::new())),
(105, Token::RoundClosed),
(72, Token::Name(String::new())),
(65, Token::Plus),
(65, Token::Minus),
(65, Token::Star),
(65, Token::Slash),
(65, Token::SlashSlash),
(65, Token::Caret),
(65, Token::Percent),
(65, Token::Ampersand),
(65, Token::Pipe),
(65, Token::GtGt),
(65, Token::LtLt),
(65, Token::DotDot),
(65, Token::Lt),
(65, Token::LtEquals),
(65, Token::Gt),
(65, Token::GtEquals),
(65, Token::EqualsEquals),
(65, Token::TildeEquals),
(65, Token::And),
(65, Token::Or),
(7, Token::Return),
(7, Token::Semicolon),
(7, Token::Break),
(47, Token::Do),
(18, Token::Else),
(50, Token::Else),
(84, Token::Elseif),
(49, Token::Then),
(94, Token::End),
(55, Token::Nil),
(55, Token::False),
(55, Token::True),
(55, Token::Numeral(String::new())),
(55, Token::StringLiteral(String::new())),
(55, Token::DotDotDot),
(55, Token::Name(String::new())),
(21, Token::Nil),
(21, Token::False),
(21, Token::True),
(21, Token::Numeral(String::new())),
(21, Token::StringLiteral(String::new())),
(21, Token::DotDotDot),
(21, Token::Name(String::new())),
(32, Token::Nil),
(32, Token::False),
(32, Token::True),
(32, Token::Numeral(String::new())),
(32, Token::StringLiteral(String::new())),
(32, Token::DotDotDot),
(32, Token::Name(String::new())),
(2, Token::Nil),
(2, Token::False),
(2, Token::True),
(2, Token::Numeral(String::new())),
(2, Token::StringLiteral(String::new())),
(2, Token::DotDotDot),
(2, Token::Name(String::new())),
(3, Token::Comma),
(3, Token::Semicolon),
(62, Token::Comma),
(62, Token::Semicolon),
(85, Token::For),
(13, Token::Name(String::new())),
(75, Token::Function),
(82, Token::Goto),
(58, Token::If),
(53, Token::In),
(33, Token::Local),
(36, Token::Name(String::new())),
(4, Token::Name(String::new())),
(5, Token::DotDotDot),
(5, Token::Name(String::new())),
(71, Token::Name(String::new())),
(104, Token::Repeat),
(29, Token::Return),
(114, Token::Semicolon),
(114, Token::Nil),
(114, Token::False),
(114, Token::True),
(114, Token::Numeral(String::new())),
(114, Token::StringLiteral(String::new())),
(114, Token::DotDotDot),
(114, Token::Name(String::new())),
(68, Token::Return),
(40, Token::Semicolon),
(40, Token::Break),
(22, Token::Semicolon),
(22, Token::Break),
(54, Token::End),
(70, Token::End),
(100, Token::End),
(11, Token::Name(String::new())),
(25, Token::End),
(78, Token::CurlyClosed),
(67, Token::Then),
(48, Token::Minus),
(48, Token::Not),
(48, Token::Hash),
(48, Token::Tilde),
(90, Token::Until),
(106, Token::Name(String::new())),
(79, Token::Name(String::new())),
(63, Token::While),
(23, Token::CurlyOpen),
(76, Token::CurlyClosed),
];
pub const NONTERMINAL_RULES: [(u8, u8, u8); 219] =
[
(80, 22, 29),
(80, 68, 114),
(80, 40, 22),
(80, 82, 81),
(80, 79, 44),
(80, 47, 25),
(80, 63, 46),
(80, 104, 59),
(80, 85, 30),
(80, 75, 89),
(80, 33, 0),
(80, 58, 93),
(80, 85, 31),
(80, 33, 11),
(80, 71, 87),
(80, 71, 73),
(80, 39, 51),
(87, 57, 105),
(87, 23, 78),
(105, 21, 34),
(44, 98, 21),
(72, 81, 95),
(95, 103, 27),
(95, 20, 60),
(95, 112, 27),
(95, 8, 36),
(103, 20, 60),
(60, 81, 6),
(7, 22, 29),
(7, 68, 114),
(7, 40, 22),
(7, 82, 81),
(7, 79, 44),
(7, 47, 25),
(7, 63, 46),
(7, 104, 59),
(7, 85, 30),
(7, 75, 89),
(7, 33, 0),
(7, 58, 93),
(7, 85, 31),
(7, 33, 11),
(7, 71, 87),
(7, 71, 73),
(7, 39, 51),
(50, 18, 7),
(74, 84, 61),
(64, 74, 64),
(64, 84, 61),
(61, 55, 49),
(49, 67, 7),
(55, 48, 55),
(55, 55, 109),
(55, 75, 113),
(55, 23, 78),
(55, 57, 14),
(55, 71, 77),
(55, 71, 26),
(55, 71, 87),
(55, 71, 73),
(109, 65, 55),
(21, 55, 96),
(21, 48, 55),
(21, 55, 109),
(21, 75, 113),
(21, 23, 78),
(21, 57, 14),
(21, 71, 77),
(21, 71, 26),
(21, 71, 87),
(21, 71, 73),
(32, 43, 28),
(32, 81, 45),
(32, 48, 55),
(32, 55, 109),
(32, 75, 113),
(32, 23, 78),
(32, 57, 14),
(32, 71, 77),
(32, 71, 26),
(32, 71, 87),
(32, 71, 73),
(28, 55, 9),
(9, 42, 45),
(45, 98, 55),
(2, 32, 3),
(2, 43, 28),
(2, 81, 45),
(2, 48, 55),
(2, 55, 109),
(2, 75, 113),
(2, 23, 78),
(2, 57, 14),
(2, 71, 77),
(2, 71, 26),
(2, 71, 87),
(2, 71, 73),
(3, 41, 62),
(3, 111, 41),
(3, 62, 32),
(38, 8, 55),
(113, 57, 1),
(1, 5, 52),
(1, 34, 25),
(52, 34, 25),
(13, 81, 102),
(102, 24, 56),
(102, 26, 24),
(102, 69, 81),
(102, 99, 81),
(56, 99, 81),
(26, 69, 81),
(24, 26, 24),
(24, 69, 81),
(73, 99, 101),
(101, 81, 87),
(51, 81, 39),
(112, 8, 36),
(27, 112, 27),
(27, 8, 36),
(36, 81, 103),
(96, 38, 96),
(96, 8, 55),
(111, 62, 32),
(41, 111, 41),
(41, 62, 32),
(91, 37, 91),
(91, 8, 81),
(37, 8, 81),
(35, 37, 35),
(35, 8, 81),
(4, 81, 91),
(5, 4, 83),
(5, 81, 91),
(83, 8, 66),
(71, 57, 14),
(71, 71, 77),
(71, 71, 26),
(71, 71, 87),
(71, 71, 73),
(14, 55, 34),
(29, 68, 114),
(114, 21, 15),
(114, 55, 96),
(114, 48, 55),
(114, 55, 109),
(114, 75, 113),
(114, 23, 78),
(114, 57, 14),
(114, 71, 77),
(114, 71, 26),
(114, 71, 87),
(114, 71, 73),
(40, 82, 81),
(40, 79, 44),
(40, 47, 25),
(40, 63, 46),
(40, 104, 59),
(40, 85, 30),
(40, 75, 89),
(40, 33, 0),
(40, 58, 93),
(40, 85, 31),
(40, 33, 11),
(40, 71, 87),
(40, 71, 73),
(40, 39, 51),
(22, 40, 22),
(22, 82, 81),
(22, 79, 44),
(22, 47, 25),
(22, 63, 46),
(22, 104, 59),
(22, 85, 30),
(22, 75, 89),
(22, 33, 0),
(22, 58, 93),
(22, 85, 31),
(22, 33, 11),
(22, 71, 87),
(22, 71, 73),
(22, 39, 51),
(97, 53, 10),
(10, 21, 16),
(89, 13, 113),
(0, 75, 19),
(19, 81, 113),
(93, 55, 12),
(12, 67, 54),
(54, 7, 70),
(54, 64, 100),
(54, 50, 94),
(70, 64, 100),
(70, 50, 94),
(100, 50, 94),
(31, 81, 17),
(17, 98, 92),
(92, 55, 108),
(46, 55, 16),
(108, 8, 88),
(88, 55, 110),
(110, 38, 16),
(110, 47, 25),
(11, 72, 44),
(11, 81, 95),
(16, 47, 25),
(25, 7, 94),
(59, 7, 86),
(59, 90, 55),
(86, 90, 55),
(30, 4, 97),
(78, 2, 76),
(106, 71, 77),
(106, 71, 26),
(77, 43, 107),
(107, 55, 42),
(79, 106, 35),
(79, 71, 77),
(79, 71, 26),
];

View File

@ -1,8 +1,10 @@
pub mod tokenizer; pub mod tokenizer;
pub mod parser;
pub mod grammar;
use std::{env, fs}; use std::{env, fs};
use crate::tokenizer::{Token, tokenize}; use crate::{tokenizer::{Token, tokenize}, parser::parse};
fn main() fn main()
{ {
@ -23,5 +25,7 @@ fn compile(file_content: &String) -> Result<(), &'static str>
{ {
let tokens: Vec<Token> = tokenize(&file_content)?; let tokens: Vec<Token> = tokenize(&file_content)?;
println!("{:?}", tokens); println!("{:?}", tokens);
let node = parse(tokens)?;
println!("{:?}", node);
return Ok(()); return Ok(());
} }

1201
src/parser.rs Normal file
View File

@ -0,0 +1,1201 @@
use crate::tokenizer::Token;
use crate::grammar::{NONTERMINAL_NAMES, NONTERMINAL_RULES, TERMINAL_RULES};
pub fn parse(tokens: Vec<Token>) -> Result<ChunkNode, &'static str>
{
return own(tokens);
}
fn own(tokens: Vec<Token>) -> Result<ChunkNode, &'static str>
{
return parse_chunk(&tokens, &mut 0);
}
#[derive(Debug)]
pub struct ChunkNode
{
block: BlockNode
}
#[derive(Debug)]
pub struct BlockNode
{
stats: Vec<StatNode>,
retstat: Option<RetstatNode>
}
#[derive(Debug)]
pub enum StatNode
{
Semicolon,
Assignment { lhs: VarlistNode, rhs: ExplistNode },
Functioncall(FunctioncallNode),
Label(String),
Break,
Goto(String),
Do(BlockNode),
While { condition: ExpNode, body: BlockNode },
Repeat { condition: ExpNode, body: BlockNode },
If { condition: ExpNode, body: BlockNode, elseifs: Vec<ElseifNode>, else_: Option<BlockNode> },
ForEq { var: String, start: ExpNode, end: ExpNode, change: Option<ExpNode>, body: BlockNode },
ForIn { vars: Vec<String>, exps: ExplistNode, body: BlockNode },
Function { name: FuncnameNode, body: FuncbodyNode },
LocalFunction { name: String, body: FuncbodyNode },
Local { attnames: AttnamelistNode, values: Option<ExplistNode> }
}
#[derive(Debug)]
pub struct RetstatNode
{
values: Option<ExplistNode>
}
#[derive(Debug)]
pub enum ExpNode
{
Nil,
False,
True,
Numeral(f64),
LiteralString(String),
Varargs,
Functiondef(FuncbodyNode),
Suffixexp(Box<SuffixexpNode>),
Tableconstructor(TableconstructorNode),
Unop(UnopType, Box<ExpNode>),
Binop { lhs: Box<ExpNode>, op: BinopType, rhs: Box<ExpNode> }
}
#[derive(Debug)]
pub enum UnopType
{
Minus, LogicalNot, Length, BinaryNot,
}
#[derive(Debug)]
pub enum BinopType
{
LogicalOr,
LocicalAnd,
Lt, Gt, LtEquals, GtEquals, NotEquals, Equals,
BinaryOr,
BinaryNot,
BinaryAnd,
Shl, Shr,
Concat,
Add, Sub,
Mul, Div, IntDiv, Mod,
Exp,
}
#[derive(Debug)]
pub struct ExplistNode
{
exps: Vec<ExpNode>
}
#[derive(Debug)]
pub struct TableconstructorNode
{
exps: Option<FieldlistNode>
}
#[derive(Debug)]
pub struct FieldlistNode
{
exps: Vec<FieldNode>
}
#[derive(Debug)]
pub enum FieldNode
{
IndexedAssignment { index: ExpNode, rhs: ExpNode },
Assignment { lhs: String, rhs: ExpNode },
Exp(ExpNode),
}
#[derive(Debug)]
pub struct VarlistNode
{
vars: Vec<VarNode>
}
#[derive(Debug)]
pub struct FunctioncallNode
{
function: SuffixexpNode,
object_arg: Option<String>,
args: ArgsNode,
}
#[derive(Debug)]
pub enum ArgsNode
{
Bracketed(Option<ExplistNode>),
Tableconstructor(TableconstructorNode),
Literal(String),
}
#[derive(Debug)]
pub struct ElseifNode
{
condition: ExpNode,
body: BlockNode,
}
#[derive(Debug)]
pub struct FuncnameNode
{
name: String,
dotted_names: Vec<String>,
first_arg: Option<String>,
}
#[derive(Debug)]
pub struct ParlistNode
{
names: Vec<String>,
has_varargs: bool,
}
#[derive(Debug)]
pub struct FuncbodyNode
{
pars: Option<ParlistNode>,
body: BlockNode,
}
#[derive(Debug)]
pub struct AttnamelistNode
{
attnames: Vec<AttnameNode>
}
#[derive(Debug)]
pub struct AttnameNode
{
name: String,
attribute: Option<String>,
}
#[derive(Debug)]
pub enum VarNode
{
Name(String),
Indexed { value: SuffixexpNode, index: ExpNode },
Member { value: SuffixexpNode, name: String }
}
#[derive(Debug)]
pub struct SuffixexpNode
{
first_part: SuffixexpFirstPart,
suffixes: Vec<SuffixexpSuffix>,
}
#[derive(Debug)]
pub enum SuffixexpFirstPart // a:b:test() => a:b.test(b) => a.b.test(a, b)
{
Name(String),
BracketedExpr(ExpNode),
}
#[derive(Debug)]
pub enum SuffixexpSuffix
{
Dot(String),
Indexed(ExpNode),
Args(ArgsNode),
ArgsFirstArg(String, ArgsNode),
}
fn parse_chunk(tokens: &Vec<Token>, i: &mut usize) -> Result<ChunkNode, &'static str>
{
return Ok(ChunkNode { block: parse_block(tokens, i)? });
}
fn parse_block(tokens: &Vec<Token>, i: &mut usize) -> Result<BlockNode, &'static str>
{
let mut stats: Vec<StatNode> = Vec::new();
while *i < tokens.len() && tokens[*i] != Token::Return && tokens[*i] != Token::End && tokens[*i] != Token::Elseif &&
tokens[*i] != Token::Else
{
stats.push(parse_stat(tokens, i)?);
}
let retstat =
if *i < tokens.len() && tokens[*i] == Token::Return { Some(parse_retstat(tokens, i)?) }
else { None };
return Ok(BlockNode { stats, retstat });
}
fn parse_stat(tokens: &Vec<Token>, i: &mut usize) -> Result<StatNode, &'static str>
{
if *i >= tokens.len()
{
return Err("Reached end of file while parsing stat");
}
match tokens[*i]
{
Token::Semicolon =>
{
*i += 1;
Ok(StatNode::Semicolon)
}
Token::Break =>
{
*i += 1;
Ok(StatNode::Break)
}
Token::Goto =>
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of stream but expected name for goto");
}
return if let Token::Name(name) = &tokens[*i]
{
*i += 1;
Ok(StatNode::Goto(name.clone()))
}
else
{
Err("Expecting name for goto")
};
}
Token::Do =>
{
*i += 1;
let body = parse_block(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::End
{
return Err("Missing 'end' for do block");
}
*i += 1;
return Ok(StatNode::Do(body));
}
Token::While =>
{
*i += 1;
let condition = parse_exp(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::Do
{
return Err("Expected 'do' after while condition")
}
*i += 1;
let body = parse_block(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::End
{
return Err("Missing 'end' for do block");
}
*i += 1;
return Ok(StatNode::While { condition, body });
}
Token::Repeat =>
{
*i += 1;
let body = parse_block(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::Until
{
return Err("Expected 'until' after repeat body");
}
*i += 1;
return Ok(StatNode::Repeat { condition: parse_exp(tokens, i)?, body });
}
Token::If =>
{
*i += 1;
let condition = parse_exp(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::Then
{
return Err("Expected 'then' after if condition");
}
*i += 1;
let body = parse_block(tokens, i)?;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing if");
}
let mut elseifs: Vec<ElseifNode> = Vec::new();
while tokens[*i] == Token::Elseif
{
*i += 1;
let elseif_condition = parse_exp(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::Then
{
return Err("Expected 'then' after elseif condition");
}
*i += 1;
elseifs.push(ElseifNode { condition: elseif_condition, body: parse_block(tokens, i)? });
}
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing if");
}
let else_ = if tokens[*i] == Token::Else
{
*i += 1;
Some(parse_block(tokens, i)?)
}
else
{
None
};
if *i >= tokens.len() || tokens[*i] != Token::End
{
return Err("Expected 'end' to close if");
}
*i += 1;
return Ok(StatNode::If { condition, body, elseifs, else_ });
}
Token::For =>
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing for");
}
if let Token::Name(name) = &tokens[*i]
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing for after first name");
}
match tokens[*i]
{
Token::Equals =>
{
*i += 1;
let start = parse_exp(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::Comma
{
return Err("Expected ',' after 'for eq' start value");
}
*i += 1;
let end = parse_exp(tokens, i)?;
if *i >= tokens.len()
{
return Err("Reached end of tokens after end value in 'for eq'");
}
let change = if tokens[*i] == Token::Comma
{
*i += 1;
Some(parse_exp(tokens, i)?)
}
else
{
None
};
if *i >= tokens.len() || tokens[*i] != Token::Do
{
return Err("Expected 'do' after 'for eq' head");
}
*i += 1;
let body = parse_block(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::End
{
return Err("Expected 'end' to close 'for eq'");
}
return Ok(StatNode::ForEq { var: name.clone(), start, end, change, body });
}
Token::Comma =>
{
let mut names = Vec::from([name.clone()]);
while tokens[*i] == Token::Comma
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing 'for in' namelist");
}
if let Token::Name(next_name) = &tokens[*i]
{
names.push(next_name.clone());
}
else
{
return Err("Expected another name in 'for in' namelist");
}
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing 'for in' namelist after name");
}
}
if tokens[*i] != Token::In
{
return Err("Expected 'in' after 'for in' namelist");
}
*i += 1;
let exps = parse_explist(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::Do
{
return Err("Expected 'do' after 'for in' explist");
}
*i += 1;
let body = parse_block(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::End
{
return Err("Expected 'end' after 'for in' body");
}
*i += 1;
return Ok(StatNode::ForIn { vars: names, exps, body });
}
_ => Err("Unexpected token after first name in for")
}
}
else
{
return Err("Expected name after 'for'");
}
}
Token::Function =>
{
*i += 1;
let funcname = parse_funcname(tokens, i)?;
return Ok(StatNode::Function { name: funcname, body: parse_funcbody(tokens, i)? });
}
Token::Local =>
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing local");
}
if tokens[*i] == Token::Function
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing local function");
}
if let Token::Name(name) = &tokens[*i]
{
*i += 1;
return Ok(StatNode::LocalFunction { name: name.clone(), body: parse_funcbody(tokens, i)? });
}
else
{
return Err("Expected local function name");
}
}
let attnames = parse_attnamelist(tokens, i)?;
let initials = if *i < tokens.len() && tokens[*i] == Token::Equals
{
*i += 1;
Some(parse_explist(tokens, i)?)
}
else
{
None
};
return Ok(StatNode::Local { attnames, values: initials });
}
Token::ColonColon =>
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing label");
}
if let Token::Name(name) = &tokens[*i]
{
*i += 1;
if *i >= tokens.len() || tokens[*i] != Token::ColonColon
{
return Err("Expected '::' after name in label declaration");
}
*i += 1;
return Ok(StatNode::Label(name.clone()));
}
else
{
return Err("Expected a name after '::' in label declaration")
}
}
Token::Name(_) | Token::RoundOpen =>
{
// assignment or functioncall
let suffix_expression = parse_suffixexp(tokens, i)?;
match tokens[*i]
{
Token::Equals =>
{
*i += 1;
return Ok(StatNode::Assignment { lhs: VarlistNode { vars: Vec::from([suffix_expression_to_var(suffix_expression)?]) }, rhs: parse_explist(tokens, i)? });
}
Token::Comma =>
{
let mut vars = Vec::from([suffix_expression_to_var(suffix_expression)?]);
while tokens[*i] == Token::Comma
{
*i += 1;
vars.push(parse_var(tokens, i)?);
}
if *i >= tokens.len() || tokens[*i] != Token::Equals
{
return Err("Expected '=' for assignment");
}
*i += 1;
return Ok(StatNode::Assignment { lhs: VarlistNode { vars }, rhs: parse_explist(tokens, i)? });
}
_ =>
{
if suffix_expression.suffixes.is_empty()
{
println!("{:?} {} {:?}", tokens[*i], i, suffix_expression);
return Err("Expected function call but suffix is empty");
}
if let Some(SuffixexpSuffix::Args(_)) = suffix_expression.suffixes.last()
{
return Ok(StatNode::Functioncall(suffix_expression_to_functioncall(suffix_expression)?));
}
if let Some(SuffixexpSuffix::ArgsFirstArg(_, _)) = suffix_expression.suffixes.last()
{
return Ok(StatNode::Functioncall(suffix_expression_to_functioncall(suffix_expression)?));
}
else
{
println!("{:?} {} {:?}", tokens[*i], i, suffix_expression.suffixes.last());
return Err("Expected function call");
}
}
}
}
_ =>
{
println!("{:?} {:?} {:?}", tokens[*i - 2], tokens[*i - 1], tokens[*i]);
Err("Unexpected token while parsing stat")
}
}
}
fn suffix_expression_to_functioncall(suffixexp: SuffixexpNode) -> Result<FunctioncallNode, &'static str>
{
let mut new_suffixexp = suffixexp;
let last = new_suffixexp.suffixes.pop();
if let Some(SuffixexpSuffix::Args(args)) = last
{
return Ok(FunctioncallNode { function: new_suffixexp, object_arg: None, args });
}
if let Some(SuffixexpSuffix::ArgsFirstArg(first_arg, args)) = last
{
return Ok(FunctioncallNode { function: new_suffixexp, object_arg: Some(first_arg.clone()), args });
}
return Err("Cannot convert suffixexp to functioncall");
}
fn suffix_expression_to_var(suffixexp: SuffixexpNode) -> Result<VarNode, &'static str>
{
if suffixexp.suffixes.is_empty()
{
return if let SuffixexpFirstPart::Name(name) = suffixexp.first_part
{
Ok(VarNode::Name(name.clone()))
}
else
{
Err("Can only convert suffix exp without suffix to var if its first part is a name")
};
}
let mut new_suffixexp = suffixexp;
let last = new_suffixexp.suffixes.pop();
if let Some(SuffixexpSuffix::Dot(name)) = last
{
return Ok(VarNode::Member { value: new_suffixexp, name: name.clone() });
}
if let Some(SuffixexpSuffix::Indexed(index)) = last
{
return Ok(VarNode::Indexed { value: new_suffixexp, index: index });
}
return Err("Cannot convert suffixexp to var");
}
fn parse_var(tokens: &Vec<Token>, i: &mut usize) -> Result<VarNode, &'static str>
{
todo!()
}
fn parse_args(tokens: &Vec<Token>, i: &mut usize) -> Result<ArgsNode, &'static str>
{
if *i > tokens.len()
{
return Err("Reached end of tokens while parsing args");
}
match &tokens[*i]
{
Token::RoundOpen =>
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while paring bracketed args");
}
if tokens[*i] == Token::RoundClosed
{
*i += 1;
return Ok(ArgsNode::Bracketed(None));
}
let exps = parse_explist(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::RoundClosed
{
println!("|{:?}|{}|{:?}|", tokens[*i], i, exps);
return Err("Expected ')' to close bracketed args");
}
*i += 1;
return Ok(ArgsNode::Bracketed(Some(exps)));
}
Token::CurlyOpen =>
{
return Ok(ArgsNode::Tableconstructor(parse_tableconstructor(tokens, i)?));
}
Token::StringLiteral(name) =>
{
*i += 1;
return Ok(ArgsNode::Literal(name.clone()));
}
_ => return Err("Unexpected token while parsing args")
}
}
fn parse_suffixexp(tokens: &Vec<Token>, i: &mut usize) -> Result<SuffixexpNode, &'static str>
{
// primaryexp { '.' 'Name' | '[' exp']' | ':' 'Name' args | args }
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing suffixexp");
}
let first_part = match &tokens[*i]
{
Token::Name(name) =>
{
*i += 1;
SuffixexpFirstPart::Name(name.clone())
},
Token::RoundOpen =>
{
*i += 1;
let ret = SuffixexpFirstPart::BracketedExpr(parse_exp(tokens, i)?);
if *i >= tokens.len() || tokens[*i] != Token::RoundClosed
{
return Err("Expected ')' to close bracketed primary expression");
}
*i += 1;
ret
}
_ => return Err("Unexpected token as first part of suffixexp")
};
let mut suffixes = Vec::new();
while *i < tokens.len()
{
match tokens[*i]
{
Token::Dot =>
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens but expected name for dotted suffix expression");
}
if let Token::Name(name) = &tokens[*i]
{
*i += 1;
suffixes.push(SuffixexpSuffix::Dot(name.clone()));
}
else
{
return Err("Expected name for dotted suffix expression");
}
}
Token::SquareOpen =>
{
*i += 1;
suffixes.push(SuffixexpSuffix::Indexed(parse_exp(tokens, i)?));
if *i >= tokens.len() || tokens[*i] != Token::SquareClosed
{
return Err("Expected ']' to close indexed suffix expression");
}
*i += 1;
}
Token::Colon =>
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens but expected name for dotted suffix expression");
}
if let Token::Name(name) = &tokens[*i]
{
*i += 1;
suffixes.push(SuffixexpSuffix::ArgsFirstArg(name.clone(), parse_args(tokens, i)?));
}
else
{
return Err("Expected name for dotted suffix expression");
}
}
Token::RoundOpen | Token::CurlyOpen | Token::StringLiteral(_) =>
{
suffixes.push(SuffixexpSuffix::Args(parse_args(tokens, i)?));
}
_ => break,
}
}
return Ok(SuffixexpNode { first_part, suffixes });
}
fn parse_retstat(tokens: &Vec<Token>, i: &mut usize) -> Result<RetstatNode, &'static str>
{
if *i >= tokens.len() || tokens[*i] != Token::Return
{
return Err("Expected 'return' to start retstat");
}
*i += 1;
if *i >= tokens.len() || tokens[*i] == Token::Semicolon || tokens[*i] == Token::Else || tokens[*i] == Token::Elseif ||
tokens[*i] == Token::End
{
if *i < tokens.len() && tokens[*i] == Token::Semicolon
{
*i += 1;
}
return Ok(RetstatNode { values: None });
}
let values = parse_explist(tokens, i)?;
if *i < tokens.len() && tokens[*i] == Token::Semicolon
{
*i += 1;
}
return Ok(RetstatNode { values: Some(values) });
}
fn parse_exp(tokens: &Vec<Token>, i: &mut usize) -> Result<ExpNode, &'static str>
{
let lhs = parse_exp_primary(tokens, i)?;
return parse_exp_precedence(tokens, i, lhs, 0);
}
fn get_precedence(token: &Token) -> Result<u8, &'static str>
{
match token
{
Token::Or => Ok(2),
Token::And => Ok(4),
Token::Lt | Token::Gt | Token::LtEquals | Token::GtEquals | Token::TildeEquals | Token::EqualsEquals => Ok(6),
Token::Pipe => Ok(8),
Token::Tilde => Ok(10),
Token::Ampersand => Ok(12),
Token::LtLt | Token::GtGt => Ok(14),
Token::DotDot => Ok(16),
Token::Plus | Token::Minus => Ok(18),
Token::Star | Token::Slash | Token::SlashSlash | Token::Percent => Ok(20),
Token::Caret => Ok(22),
_ => Err("Tried to get precedence for unknown operator"),
}
}
fn get_binop(token: &Token) -> Result<BinopType, &'static str>
{
match token
{
Token::Or => Ok(BinopType::LogicalOr),
Token::And => Ok(BinopType::LocicalAnd),
Token::Lt => Ok(BinopType::Lt),
Token::Gt => Ok(BinopType::Lt),
Token::LtEquals => Ok(BinopType::LtEquals),
Token::GtEquals => Ok(BinopType::GtEquals),
Token::TildeEquals => Ok(BinopType::NotEquals),
Token::EqualsEquals => Ok(BinopType::Equals),
Token::Pipe => Ok(BinopType::BinaryOr),
Token::Tilde => Ok(BinopType::BinaryNot),
Token::Ampersand => Ok(BinopType::BinaryAnd),
Token::DotDot => Ok(BinopType::Concat),
Token::Plus => Ok(BinopType::Add),
Token::Minus => Ok(BinopType::Sub),
Token::Star => Ok(BinopType::Mul),
Token::Slash => Ok(BinopType::Div),
Token::SlashSlash => Ok(BinopType::IntDiv),
Token::Percent => Ok(BinopType::Mod),
Token::Caret => Ok(BinopType::Exp),
_ =>
{
println!("{:?}", token);
Err("Tried to get binop type for unknown operator")
}
}
}
fn is_binop(token: &Token) -> bool
{
match token
{
Token::Or | Token::And | Token::Lt | Token::Gt | Token::LtEquals | Token::GtEquals | Token::TildeEquals | Token::EqualsEquals |
Token::Pipe | Token::Tilde | Token::Ampersand | Token::LtLt | Token::GtGt | Token::DotDot | Token::Plus | Token::Minus |
Token::Star | Token::Slash | Token::SlashSlash | Token::Percent | Token::Caret =>
{
true
}
_ => false
}
}
fn is_right_associative(token: &Token) -> bool
{
return token == &Token::DotDot || token == &Token::Caret;
}
fn parse_exp_precedence(tokens: &Vec<Token>, i: &mut usize, lhs: ExpNode, min_precedence: u8) -> Result<ExpNode, &'static str>
{
let mut lhs = lhs;
while *i < tokens.len() && is_binop(&tokens[*i])
{
let precedence = get_precedence(&tokens[*i])?;
if precedence < min_precedence
{
break;
}
let op = get_binop(&tokens[*i])?;
*i += 1;
let mut rhs = parse_exp_primary(tokens, i)?;
while *i < tokens.len() && is_binop(&tokens[*i]) && (get_precedence(&tokens[*i])? > precedence ||
(get_precedence(&tokens[*i])? == precedence && is_right_associative(&tokens[*i])))
{
rhs = parse_exp_precedence(tokens, i, rhs, precedence + if precedence == get_precedence(&tokens[*i])? {0} else {1})?;
}
lhs = ExpNode::Binop { lhs: Box::new(lhs), op, rhs: Box::new(rhs) };
}
return Ok(lhs);
}
fn parse_exp_primary(tokens: &Vec<Token>, i: &mut usize) -> Result<ExpNode, &'static str>
{
if *i >= tokens.len()
{
return Err("Reached end of tokens but expected primary expression");
}
match &tokens[*i]
{
Token::Nil =>
{
*i += 1;
Ok(ExpNode::Nil)
},
Token::True =>
{
*i += 1;
Ok(ExpNode::True)
},
Token::False =>
{
*i += 1;
Ok(ExpNode::False)
},
Token::Numeral(number_str) =>
{
*i += 1;
Ok(ExpNode::Numeral(number_str.parse::<f64>().map_err(|_| "Could not parse number")?))
},
Token::StringLiteral(string) =>
{
*i += 1;
Ok(ExpNode::LiteralString(string.clone()))
},
Token::DotDotDot =>
{
*i += 1;
Ok(ExpNode::Varargs)
},
Token::Function =>
{
*i += 1;
Ok(ExpNode::Functiondef(parse_funcbody(tokens, i)?))
}
Token::CurlyOpen => Ok(ExpNode::Tableconstructor(parse_tableconstructor(tokens, i)?)),
Token::Minus =>
{
Ok(ExpNode::Unop(UnopType::Minus, Box::new(parse_exp(tokens, i)?)))
}
Token::Hash =>
{
Ok(ExpNode::Unop(UnopType::Length, Box::new(parse_exp(tokens, i)?)))
}
Token::Not =>
{
Ok(ExpNode::Unop(UnopType::LogicalNot, Box::new(parse_exp(tokens, i)?)))
}
Token::Tilde =>
{
Ok(ExpNode::Unop(UnopType::BinaryNot, Box::new(parse_exp(tokens, i)?)))
}
_ => Ok(ExpNode::Suffixexp(Box::new(parse_suffixexp(tokens, i)?))),
}
}
fn parse_tableconstructor(tokens: &Vec<Token>, i: &mut usize) -> Result<TableconstructorNode, &'static str>
{
todo!()
}
fn parse_explist(tokens: &Vec<Token>, i: &mut usize) -> Result<ExplistNode, &'static str>
{
let mut exps: Vec<ExpNode> = Vec::from([parse_exp(tokens, i)?]);
while *i < tokens.len() && tokens[*i] == Token::Comma
{
*i += 1;
exps.push(parse_exp(tokens, i)?);
}
return Ok(ExplistNode { exps });
}
fn parse_funcname(tokens: &Vec<Token>, i: &mut usize) -> Result<FuncnameNode, &'static str>
{
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing funcname");
}
if let Token::Name(name) = &tokens[*i]
{
*i += 1;
let mut dotted_names = Vec::new();
while *i < tokens.len() && tokens[*i] == Token::Dot
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing dotted part of funcname");
}
if let Token::Name(dotted_name) = &tokens[*i]
{
*i += 1;
dotted_names.push(dotted_name.clone());
}
else
{
return Err("Expected name in dotted funcname");
}
}
let first_arg = if *i < tokens.len() && tokens[*i] == Token::Colon
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing funcname first arg");
}
if let Token::Name(arg_name) = &tokens[*i]
{
*i += 1;
Some(arg_name.clone())
}
else
{
return Err("Expected name of first arg in funcname");
}
}
else
{
None
};
return Ok(FuncnameNode { name: name.clone(), dotted_names, first_arg });
}
else
{
return Err("Expected func name");
}
}
fn parse_funcbody(tokens: &Vec<Token>, i: &mut usize) -> Result<FuncbodyNode, &'static str>
{
if *i >= tokens.len() || tokens[*i] != Token::RoundOpen
{
return Err("Expected '(' to start funcbody");
}
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing funcbody parlist");
}
let pars = if tokens[*i] == Token::RoundClosed
{
*i += 1;
None
}
else
{
let ret = Some(parse_parlist(tokens, i)?);
if *i >= tokens.len() || tokens[*i] != Token::RoundClosed
{
return Err("Expected ')' to close funcbody parlist");
}
*i += 1;
ret
};
let block = parse_block(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::End
{
println!("{:?}", &tokens[(*i - 10)..(*i + 10)]);
return Err("Expected 'end' to close funcbody");
}
*i += 1;
return Ok(FuncbodyNode { pars, body: block });
}
fn parse_parlist(tokens: &Vec<Token>, i: &mut usize) -> Result<ParlistNode, &'static str>
{
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing parlist");
}
if tokens[*i] == Token::DotDotDot
{
*i += 1;
return Ok(ParlistNode { names: Vec::new(), has_varargs: true });
}
let first_name = if let Token::Name(name) = &tokens[*i]
{
*i += 1;
name.clone()
}
else
{
return Err("Expected name to start parlist");
};
let mut names = Vec::from([first_name]);
let mut has_varargs = false;
while *i < tokens.len() && tokens[*i] == Token::Comma
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing parlist name list");
}
match &tokens[*i]
{
Token::Name(name) =>
{
*i += 1;
names.push(name.clone());
}
Token::DotDotDot =>
{
*i += 1;
has_varargs = true;
break;
}
_ => return Err("Unexpected token while parsing parlist name list"),
}
}
return Ok(ParlistNode { names, has_varargs });
}
fn parse_attnamelist(tokens: &Vec<Token>, i: &mut usize) -> Result<AttnamelistNode, &'static str>
{
let mut attnames: Vec<AttnameNode> = Vec::from([parse_attname(tokens, i)?]);
while *i < tokens.len() && tokens[*i] == Token::Comma
{
*i += 1;
attnames.push(parse_attname(tokens, i)?);
}
return Ok(AttnamelistNode { attnames });
}
fn parse_attname(tokens: &Vec<Token>, i: &mut usize) -> Result<AttnameNode, &'static str>
{
if *i >= tokens.len()
{
return Err("Reached end of tokens but expected name for attrib name");
}
if let Token::Name(name) = &tokens[*i]
{
*i += 1;
let attribute = if *i < tokens.len() && tokens[*i] == Token::Lt
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens but expected attribute");
}
if let Token::Name(attrib) = &tokens[*i]
{
*i += 1;
if *i >= tokens.len() || tokens[*i] != Token::Gt
{
return Err("Exptected '>' to close attribute name");
}
Some(attrib.clone())
}
else
{
return Err("Expected attribute in attrib name");
}
}
else
{
None
};
return Ok(AttnameNode { name: name.clone(), attribute });
}
else
{
return Err("Expected name for attrib name");
}
}
//===============================================================================================================================================
//===============================================================================================================================================
//===============================================================================================================================================
//===============================================================================================================================================
//===============================================================================================================================================
//===============================================================================================================================================
#[derive(Debug, Clone, Copy)]
pub struct Node
{
}
#[derive(Debug, Clone, Copy)]
pub struct AmbiguousNode
{
}
pub fn cyk(tokens: Vec<Token>) -> Result<ChunkNode, &'static str>
{
let r = NONTERMINAL_NAMES.len();
let n = tokens.len();
macro_rules! index {
($x:expr, $y:expr, $z:expr) => {
($x + $y * n + ($z as usize) * n * n)
};
}
let mut p = vec![false; n * n * r];
//let mut back: Vec<Vec<(usize, u8, u8)>> = vec![Vec::new(); n * n * r];
println!("{n}, {r}, {}", p.len());
for s in 0..n
{
for (index, token) in TERMINAL_RULES
{
if let Token::Name(_) = tokens[s]
{
if let Token::Name(_) = token
{
p[index!(0, s, index)] = true
}
}
else if let Token::StringLiteral(_) = tokens[s]
{
if let Token::StringLiteral(_) = token
{
p[index!(0, s, index)] = true
}
}
else if let Token::Numeral(_) = tokens[s]
{
if let Token::Numeral(_) = token
{
p[index!(0, s, index)] = true
}
}
else if token == tokens[s]
{
p[index!(0, s, index)] = true
}
}
}
println!("Done initializing");
for l in 2..=n
{
for s in 1..=(n - l + 1)
{
for _p in 1..=(l-1)
{
for &(a, b, c) in &NONTERMINAL_RULES
{
if p[index!(_p - 1, s - 1, b)] && p[index!(l - _p - 1, s + _p - 1, c)]
{
let index = index!(l - 1, s - 1, a);
p[index] = true;
/* if !back[index].contains(&(_p, b, c))
{
back[index].push((_p, b, c));
}*/
}
}
}
}
println!("{l}");
}
let start_index = NONTERMINAL_NAMES.iter().position(|x| x == &"S_0").expect("no start index found");
if p[index!(n - 1, 0, start_index)]
{
println!("Is part of the language");
todo!()
//return Ok(disambiguate(traverse_back(back, tokens, n, 1, start_index)));
}
else
{
return Err("Input is not part of the language")
}
}
fn traverse_back(back: Vec<Vec<(usize, u8, u8)>>, tokens: Vec<Token>, l: usize, s: usize, a: usize) -> AmbiguousNode
{
todo!()
}
fn disambiguate(root: AmbiguousNode) -> Node
{
todo!()
}

View File

@ -1,4 +1,4 @@
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Token pub enum Token
{ {
Name(String), Name(String),
@ -11,8 +11,7 @@ pub enum Token
EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals, EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals,
RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon,
Semicolon, Colon, Comma, Dot, DotDot, DotDotDot, Semicolon, Colon, Comma, Dot, DotDot, DotDotDot,
IntLiteral(String), Numeral(String),
HexLiteral(String),
StringLiteral(String), StringLiteral(String),
} }
@ -171,7 +170,7 @@ fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, ind
'/' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Slash), TokenizerState::Slash), '/' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Slash), TokenizerState::Slash),
';' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Semicolon), TokenizerState::Semicolon), ';' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Semicolon), TokenizerState::Semicolon),
'^' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Caret), TokenizerState::Caret), '^' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Caret), TokenizerState::Caret),
'0' => tokenize_terminal(last_index, *index, token, state, Some(Token::IntLiteral("0".to_string())), TokenizerState::Zero, token_str, ch), '0' => tokenize_terminal(last_index, *index, token, state, Some(Token::Numeral("0".to_string())), TokenizerState::Zero, token_str, ch),
'"' => '"' =>
{ {
*token = None; *token = None;
@ -187,11 +186,11 @@ fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, ind
if ch.is_whitespace() { } if ch.is_whitespace() { }
else if ch.is_ascii_alphabetic() || ch == '_' else if ch.is_ascii_alphabetic() || ch == '_'
{ {
tokenize_terminal(last_index, *index, token, state, Some(Token::Name(token_str.clone())), TokenizerState::Name, token_str, ch); tokenize_terminal(last_index, *index, token, state, Some(Token::Name(ch.to_string())), TokenizerState::Name, token_str, ch);
} }
else if ch.is_numeric() && ch.is_ascii() else if ch.is_numeric() && ch.is_ascii()
{ {
tokenize_terminal(last_index, *index, token, state, Some(Token::IntLiteral(token_str.clone())), TokenizerState::Number, token_str, ch); tokenize_terminal(last_index, *index, token, state, Some(Token::Numeral(ch.to_string())), TokenizerState::Number, token_str, ch);
} }
else else
{ {
@ -422,7 +421,7 @@ fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, ind
{ {
*last_index = *index as i32; *last_index = *index as i32;
token_str.push(ch); token_str.push(ch);
*token = Some(Token::IntLiteral(token_str.clone())); *token = Some(Token::Numeral(token_str.clone()));
} }
else else
{ {
@ -441,7 +440,7 @@ fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, ind
{ {
*last_index = *index as i32; *last_index = *index as i32;
token_str.push(ch); token_str.push(ch);
*token = Some(Token::HexLiteral(token_str.clone())); *token = Some(Token::Numeral(token_str.clone()));
*state = TokenizerState::HexNumber; *state = TokenizerState::HexNumber;
} }
else else
@ -469,7 +468,7 @@ fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, ind
{ {
*last_index = *index as i32; *last_index = *index as i32;
token_str.push(ch); token_str.push(ch);
*token = Some(Token::HexLiteral(token_str.clone())); *token = Some(Token::Numeral(token_str.clone()));
} }
else else
{ {
@ -494,7 +493,7 @@ fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, ind
{ {
*last_index = *index as i32; *last_index = *index as i32;
token_str.push(ch); token_str.push(ch);
*token = Some(Token::IntLiteral(token_str.clone())); *token = Some(Token::Numeral(token_str.clone()));
} }
else else
{ {
@ -1179,7 +1178,32 @@ pub fn tokenize(file_content: &String) -> Result<Vec<Token>, &'static str>
TokenizerState::End => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::End)?, TokenizerState::End => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::End)?,
TokenizerState::And => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::And)?, TokenizerState::And => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::And)?,
TokenizerState::Semicolon => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::Semicolon)?, TokenizerState::Semicolon => tokenize_backtrack_custom_token(&mut last_index, &mut index, &mut tokens, &mut token, &mut token_str, &mut state, Token::Semicolon)?,
_ => todo!("state: {:?}", state), TokenizerState::Number =>
{
if let Some(numeral_token) = token
{
if let Token::Numeral(_) = numeral_token
{
tokens.push(numeral_token);
}
else
{
return Err("In number state but current token is not a numeral")
}
}
else
{
return Err("In number state but no current token")
}
}
TokenizerState::Start =>
{
if token.is_some()
{
return Err("Finished tokenizing in the start state but the token was non-empty");
}
}
_ => todo!("state: {:?} {:?}", state, token),
} }
return Ok(tokens); return Ok(tokens);