luaaaaah/src/parser.rs

592 lines
14 KiB
Rust

use crate::tokenizer::Token;
use crate::grammar::{NONTERMINAL_NAMES, NONTERMINAL_RULES, TERMINAL_RULES};
pub fn parse(tokens: Vec<Token>) -> Result<ChunkNode, &'static str>
{
return own(tokens);
}
fn own(tokens: Vec<Token>) -> Result<ChunkNode, &'static str>
{
return parse_chunk(&tokens, &mut 0);
}
#[derive(Debug)]
pub struct ChunkNode
{
block: BlockNode
}
#[derive(Debug)]
pub struct BlockNode
{
stats: Vec<StatNode>,
retstat: Option<RetstatNode>
}
#[derive(Debug)]
pub enum StatNode
{
Semicolon,
Assignment { lhs: VarlistNode, rhs: ExplistNode },
Functioncall(FunctioncallNode),
Label(String),
Break,
Goto(String),
Do(BlockNode),
While { condition: ExpNode, body: BlockNode },
Repeat { condition: ExpNode, body: BlockNode },
If { condition: ExpNode, body: BlockNode, elseifs: Vec<ElseifNode>, else_: Option<BlockNode> },
ForEq { var: String, start: ExpNode, end: ExpNode, change: Option<ExpNode>, body: BlockNode },
ForIn { vars: Vec<String>, exps: ExplistNode, body: BlockNode },
Function { name: FuncnameNode, body: FuncbodyNode },
LocalFunction { name: String, body: FuncbodyNode },
Local { attnames: AttnamelistNode, values: Option<ExplistNode> }
}
#[derive(Debug)]
pub struct RetstatNode
{
values: ExplistNode
}
#[derive(Debug)]
pub struct ExpNode
{
}
#[derive(Debug)]
pub struct ExplistNode
{
exps: Vec<ExpNode>
}
#[derive(Debug)]
pub struct VarlistNode
{
vars: Vec<VarNode>
}
#[derive(Debug)]
pub struct FunctioncallNode
{
function: SuffixexpNode,
object_arg: Option<String>,
args: ArgsNode,
}
#[derive(Debug)]
pub struct ArgsNode
{
}
#[derive(Debug)]
pub struct ElseifNode
{
condition: ExpNode,
body: BlockNode,
}
#[derive(Debug)]
pub struct FuncnameNode
{
}#[derive(Debug)]
pub struct FuncbodyNode
{
}
#[derive(Debug)]
pub struct AttnamelistNode
{
}
#[derive(Debug)]
pub enum VarNode
{
Name(String),
Indexed { value: PrefixexpNode, index: ExpNode },
Member { value: PrefixexpNode, name: String }
}
#[derive(Debug)]
pub enum PrefixexpNode
{
}
fn parse_chunk(tokens: &Vec<Token>, i: &mut usize) -> Result<ChunkNode, &'static str>
{
return Ok(ChunkNode { block: parse_block(tokens, i)? });
}
fn parse_block(tokens: &Vec<Token>, i: &mut usize) -> Result<BlockNode, &'static str>
{
let mut stats: Vec<StatNode> = Vec::new();
while *i < tokens.len() && tokens[*i] != Token::Return
{
stats.push(parse_stat(tokens, i)?);
}
let retstat =
if *i < tokens.len() && tokens[*i] == Token::Return { Some(parse_retstat(tokens, i)?) }
else { None };
return Ok(BlockNode { stats, retstat });
}
fn parse_stat(tokens: &Vec<Token>, i: &mut usize) -> Result<StatNode, &'static str>
{
if *i >= tokens.len()
{
return Err("Reached end of file while parsing stat");
}
match tokens[*i]
{
Token::Semicolon =>
{
*i += 1;
Ok(StatNode::Semicolon)
}
Token::Break =>
{
*i += 1;
Ok(StatNode::Break)
}
Token::Goto =>
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of stream but expected name for goto");
}
return if let Token::Name(name) = &tokens[*i]
{
*i += 1;
Ok(StatNode::Goto(name.clone()))
}
else
{
Err("Expecting name for goto")
};
}
Token::Do =>
{
*i += 1;
let body = parse_block(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::End
{
return Err("Missing 'end' for do block");
}
*i += 1;
return Ok(StatNode::Do(body));
}
Token::While =>
{
*i += 1;
let condition = parse_exp(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::Do
{
return Err("Expected 'do' after while condition")
}
*i += 1;
let body = parse_block(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::End
{
return Err("Missing 'end' for do block");
}
*i += 1;
return Ok(StatNode::While { condition, body });
}
Token::Repeat =>
{
*i += 1;
let body = parse_block(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::Until
{
return Err("Expected 'until' after repeat body");
}
*i += 1;
return Ok(StatNode::Repeat { condition: parse_exp(tokens, i)?, body });
}
Token::If =>
{
*i += 1;
let condition = parse_exp(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::Then
{
return Err("Expected 'then' after if condition");
}
*i += 1;
let body = parse_block(tokens, i)?;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing if");
}
let mut elseifs: Vec<ElseifNode> = Vec::new();
while tokens[*i] == Token::Elseif
{
*i += 1;
let elseif_condition = parse_exp(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::Then
{
return Err("Expected 'then' after elseif condition");
}
*i += 1;
elseifs.push(ElseifNode { condition: elseif_condition, body: parse_block(tokens, i)? });
}
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing if");
}
let else_ = if tokens[*i] == Token::Else
{
*i += 1;
Some(parse_block(tokens, i)?)
}
else
{
None
};
if *i >= tokens.len() || tokens[*i] != Token::End
{
return Err("Expected 'end' to close if");
}
*i += 1;
return Ok(StatNode::If { condition, body, elseifs, else_ });
}
Token::For =>
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing for");
}
if let Token::Name(name) = &tokens[*i]
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing for after first name");
}
match tokens[*i]
{
Token::Equals =>
{
*i += 1;
let start = parse_exp(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::Comma
{
return Err("Expected ',' after 'for eq' start value");
}
*i += 1;
let end = parse_exp(tokens, i)?;
if *i >= tokens.len()
{
return Err("Reached end of tokens after end value in 'for eq'");
}
let change = if tokens[*i] == Token::Comma
{
*i += 1;
Some(parse_exp(tokens, i)?)
}
else
{
None
};
if *i >= tokens.len() || tokens[*i] != Token::Do
{
return Err("Expected 'do' after 'for eq' head");
}
*i += 1;
let body = parse_block(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::End
{
return Err("Expected 'end' to close 'for eq'");
}
return Ok(StatNode::ForEq { var: name.clone(), start, end, change, body });
}
Token::Comma =>
{
let mut names = Vec::from([name.clone()]);
while tokens[*i] == Token::Comma
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing 'for in' namelist");
}
if let Token::Name(next_name) = &tokens[*i]
{
names.push(next_name.clone());
}
else
{
return Err("Expected another name in 'for in' namelist");
}
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing 'for in' namelist after name");
}
}
if tokens[*i] != Token::In
{
return Err("Expected 'in' after 'for in' namelist");
}
*i += 1;
let exps = parse_explist(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::Do
{
return Err("Expected 'do' after 'for in' explist");
}
*i += 1;
let body = parse_block(tokens, i)?;
if *i >= tokens.len() || tokens[*i] != Token::End
{
return Err("Expected 'end' after 'for in' body");
}
*i += 1;
return Ok(StatNode::ForIn { vars: names, exps, body });
}
_ => Err("Unexpected token after first name in for")
}
}
else
{
return Err("Expected name after 'for'");
}
}
Token::Function =>
{
*i += 1;
let funcname = parse_funcname(tokens, i)?;
return Ok(StatNode::Function { name: funcname, body: parse_funcbody(tokens, i)? });
}
Token::Local =>
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing local");
}
if tokens[*i] == Token::Function
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing local function");
}
if let Token::Name(name) = &tokens[*i]
{
*i += 1;
return Ok(StatNode::LocalFunction { name: name.clone(), body: parse_funcbody(tokens, i)? });
}
else
{
return Err("Expected local function name");
}
}
let attnames = parse_attnamelist(tokens, i)?;
let initials = if *i < tokens.len() && tokens[*i] == Token::Equals
{
*i += 1;
Some(parse_explist(tokens, i)?)
}
else
{
None
};
return Ok(StatNode::Local { attnames, values: initials });
}
Token::ColonColon =>
{
*i += 1;
if *i >= tokens.len()
{
return Err("Reached end of tokens while parsing label");
}
if let Token::Name(name) = &tokens[*i]
{
*i += 1;
if *i >= tokens.len() || tokens[*i] != Token::ColonColon
{
return Err("Expected '::' after name in label declaration");
}
*i += 1;
return Ok(StatNode::Label(name.clone()));
}
else
{
return Err("Expected a name after '::' in label declaration")
}
}
Token::Name(_) | Token::RoundOpen =>
{
// assignment or functioncall
let suffix_expression = parse_suffixexp(tokens, i)?;
match tokens[*i]
{
Token::Equals =>
{
*i += 1;
return Ok(StatNode::Assignment { lhs: VarlistNode { vars: Vec::from([suffix_expression_to_var(suffix_expression)]) }, rhs: parse_explist(tokens, i)? });
}
Token::Comma =>
{
let mut vars = Vec::from([suffix_expression_to_var(suffix_expression)]);
while tokens[*i] == Token::Comma
{
*i += 1;
vars.push(parse_var(tokens, i)?);
}
if *i >= tokens.len() || tokens[*i] != Token::Equals
{
return Err("Expected '=' for assignment");
}
*i += 1;
return Ok(StatNode::Assignment { lhs: VarlistNode { vars }, rhs: parse_explist(tokens, i)? });
}
_ =>
{
if let SuffixexpNode()
}
}
}
_ => Err("Unexpected token while parsing stat")
}
}
fn suffix_expression_to_var(suffixexp: SuffixexpNode) -> VarNode
{
todo!()
}
fn parse_var(tokens: &Vec<Token>, i: &mut usize) -> Result<VarNode, &'static str>
{
todo!()
}
fn parse_args(tokens: &Vec<Token>, i: &mut usize) -> Result<ArgsNode, &'static str>
{
todo!()
}
fn parse_suffixexp(tokens: &Vec<Token>, i: &mut usize) -> Result<SuffixexpNode, &'static str>
{
// primaryexp { '.' 'Name' | '[' exp']' | ':' 'Name' args | args }
todo!()
}
fn parse_retstat(tokens: &Vec<Token>, i: &mut usize) -> Result<RetstatNode, &'static str>
{
todo!("{:?}", tokens[*i])
}
fn parse_exp(tokens: &Vec<Token>, i: &mut usize) -> Result<ExpNode, &'static str>
{
todo!("{:?}", tokens[*i])
}
fn parse_explist(tokens: &Vec<Token>, i: &mut usize) -> Result<ExplistNode, &'static str>
{
todo!("{:?}", tokens[*i])
}
fn parse_funcname(tokens: &Vec<Token>, i: &mut usize) -> Result<FuncnameNode, &'static str>
{
todo!("{:?}", tokens[*i])
}
fn parse_funcbody(tokens: &Vec<Token>, i: &mut usize) -> Result<FuncbodyNode, &'static str>
{
todo!("{:?}", tokens[*i])
}
fn parse_attnamelist(tokens: &Vec<Token>, i: &mut usize) -> Result<AttnamelistNode, &'static str>
{
todo!("{:?}", tokens[*i])
}
//===============================================================================================================================================
//===============================================================================================================================================
//===============================================================================================================================================
//===============================================================================================================================================
//===============================================================================================================================================
//===============================================================================================================================================
#[derive(Debug, Clone, Copy)]
pub struct Node
{
}
#[derive(Debug, Clone, Copy)]
pub struct AmbiguousNode
{
}
pub fn cyk(tokens: Vec<Token>) -> Result<ChunkNode, &'static str>
{
let r = NONTERMINAL_NAMES.len();
let n = tokens.len();
macro_rules! index {
($x:expr, $y:expr, $z:expr) => {
($x + $y * n + ($z as usize) * n * n)
};
}
let mut p = vec![false; n * n * r];
//let mut back: Vec<Vec<(usize, u8, u8)>> = vec![Vec::new(); n * n * r];
println!("{n}, {r}, {}", p.len());
for s in 0..n
{
for (index, token) in TERMINAL_RULES
{
if let Token::Name(_) = tokens[s]
{
if let Token::Name(_) = token
{
p[index!(0, s, index)] = true
}
}
else if let Token::StringLiteral(_) = tokens[s]
{
if let Token::StringLiteral(_) = token
{
p[index!(0, s, index)] = true
}
}
else if let Token::Numeral(_) = tokens[s]
{
if let Token::Numeral(_) = token
{
p[index!(0, s, index)] = true
}
}
else if token == tokens[s]
{
p[index!(0, s, index)] = true
}
}
}
println!("Done initializing");
for l in 2..=n
{
for s in 1..=(n - l + 1)
{
for _p in 1..=(l-1)
{
for &(a, b, c) in &NONTERMINAL_RULES
{
if p[index!(_p - 1, s - 1, b)] && p[index!(l - _p - 1, s + _p - 1, c)]
{
let index = index!(l - 1, s - 1, a);
p[index] = true;
/* if !back[index].contains(&(_p, b, c))
{
back[index].push((_p, b, c));
}*/
}
}
}
}
println!("{l}");
}
let start_index = NONTERMINAL_NAMES.iter().position(|x| x == &"S_0").expect("no start index found");
if p[index!(n - 1, 0, start_index)]
{
println!("Is part of the language");
todo!()
//return Ok(disambiguate(traverse_back(back, tokens, n, 1, start_index)));
}
else
{
return Err("Input is not part of the language")
}
}
fn traverse_back(back: Vec<Vec<(usize, u8, u8)>>, tokens: Vec<Token>, l: usize, s: usize, a: usize) -> AmbiguousNode
{
todo!()
}
fn disambiguate(root: AmbiguousNode) -> Node
{
todo!()
}