diff --git a/src/main.rs b/src/main.rs index 2c1ae17..5f77163 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,13 +2,14 @@ use core::panic; use std::collections::HashMap; use std::env; use std::fs; +use std::iter::Peekable; use std::process::exit; #[derive(Debug, Clone, PartialEq)] enum Token { StringLit(String, i32, i32), - IntLit(i64, i32, i32), + IntLit(String, i32, i32), Keyword(String, i32, i32), } @@ -20,22 +21,43 @@ enum TokenizerState Comment, } -#[derive(Debug,Clone,Copy,PartialEq)] +#[derive(Debug,Clone,Copy)] enum Datatype { Int, String, - Pointer, + //Pointer, Any, } +impl PartialEq for Datatype +{ + fn eq(&self, other: &Self) -> bool + { + core::mem::discriminant(self) == core::mem::discriminant(&Datatype::Any) || + core::mem::discriminant(other) == core::mem::discriminant(&Datatype::Any) || + core::mem::discriminant(self) == core::mem::discriminant(other) + } +} + #[derive(Debug)] struct Function { name: String, ins: Vec, outs: Vec, - content: Vec + content: Vec, +} + +#[derive(Debug)] +enum Operation +{ + Enqueue(Datatype, String, i32, i32), + Dequeue(i32, i32), + Intrinsic(String, i32, i32), + FunctionCall(String, i32, i32), + If(Vec, Option>, i32, i32), + While(Vec, i32, i32), } fn main() @@ -43,309 +65,472 @@ fn main() let intrinsics: HashMap<&str, (Vec, Vec)> = HashMap::from( [ ("print", (Vec::from([Datatype::Any]), Vec::new())), - ("deq", (Vec::from([Datatype::Any]), Vec::new())), + ("-", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))), + ("req", (Vec::from([Datatype::Any]), Vec::from([Datatype::Any]))), ]); let args: Vec = env::args().collect(); if args.len() < 2 { usage() } + let mut debug = false; + for arg in &args[3..] + { + match arg.as_str() + { + "-d" | "--debug" => debug = true, + _ => panic!("Unknown option {}", arg), + } + } match args[1].as_str() { "-c" | "--compile" => { let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file"); let mut tokens: Vec = tokenize(&file_content); - println!("{:?}", tokens); - let functions: Vec = extract_functions(&mut tokens, &intrinsics); - println!("{:?}", functions); - validate(&tokens, &functions, &intrinsics); + println!("---Done tokenizing, got {} tokens---", tokens.len()); + let functions: Vec = extract_functions(&mut tokens, &intrinsics, debug); + println!("---Done extracting functions, got {} functions and reduced the token count to {}---", functions.len(), tokens.len()); + let operations = parse_until_delimiter(&mut tokens.iter().peekable(), &intrinsics, None, debug); + println!("---Done parsing tokens into {} operations---", operations.len()); + validate_function_calls(&operations, &functions, debug); + println!("---Done validating function calls---"); + typecheck(&operations, &functions, &intrinsics, debug); + println!("---Done typechecking---"); } _ => panic!("Unknown option {}", args[1]) } } -fn validate(tokens: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>) +fn typecheck(operations: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) { - validate_tokens(tokens, functions, intrinsics); - println!("Validating main scope succeeded"); for function in functions { - validate_tokens(&function.content, functions, intrinsics); - println!("Validating function {} succeeded", function.name); - } - validate_queue_integrity(tokens, &Vec::new(), &Vec::new(), functions, intrinsics); - println!("Validating queue integrity for main scope succeeded"); - for function in functions - { - validate_queue_integrity(&function.content, &function.ins, &function.outs, functions, intrinsics); - println!("Validating queue integrity for function {} succeeded", function.name); - } -} - -const CONTROL_STRUCTURE_NAMES: [&'static str; 3] = ["if", "else", "while"]; - -fn validate_tokens(tokens: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>) -{ - let mut depth = 0; - let mut starting_control_structure = false; - for token in tokens - { - match token + if debug { - Token::Keyword(name, line, col) => - { - match name.as_str() - { - "{" => - { - if !starting_control_structure - { - panic!("Control structure without '{{' at {}:{}", line, col); - } - depth += 1; - } - "}" => - { - depth -= 1; - if depth < 0 - { - panic!("Encountered '}}' without matching '{{' at {}:{}", line, col); - } - } - _ => - { - if !CONTROL_STRUCTURE_NAMES.contains(&name.as_str()) && !functions.iter().any(|x| &x.name == name) && !intrinsics.contains_key(name.as_str()) - { - panic!("Unknown word {name} at {}:{}", line, col); - } - } - } - starting_control_structure = CONTROL_STRUCTURE_NAMES.contains(&name.as_str()); - } - _ => {} + println!("Now typechecking function '{}'", function.name); + } + typecheck_block(&function.content, &function.ins, &function.outs, functions, intrinsics, debug); + if debug + { + println!("Successfully typechecked function '{}'", function.name); } } + if debug + { + println!("Now typechecking main operations"); + } + typecheck_block(operations, &Vec::new(), &Vec::new(), functions, intrinsics, debug); + if debug + { + println!("Successfully typechecked main operations"); + } } -fn validate_queue_integrity(tokens: &Vec, ins: &Vec, outs: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>) +fn typecheck_block(operations: &Vec, ins: &Vec, outs: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) { - let mut type_queue: Vec = Vec::new(); + let actual_outs = get_return_type(operations, ins, functions, intrinsics, debug); + if &actual_outs != outs + { + let (line, col) = match operations.last() + { + Some(operation) => + { + match operation + { + Operation::Enqueue(_, _, line, col) | + Operation::FunctionCall(_, line, col) | + Operation::If(_, _, line, col) | + Operation::Intrinsic(_, line, col) | + Operation::While(_, line, col) | + Operation::Dequeue(line, col) => (*line, *col), + } + } + None => (-1, -1) + }; + panic!("Wrong queue state at the end of a block, expected {:?} but got {:?} at {}:{}", outs, actual_outs, line, col); + } +} + +fn get_return_type(operations: &Vec, ins: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Vec +{ + let type_queue: &mut Vec = &mut Vec::new(); type_queue.extend_from_slice(ins); - for token in tokens + let mut debug_string = String::from(""); + for operation in operations { - match token + if debug { - Token::IntLit(_, _, _) => type_queue.push(Datatype::Int), - Token::StringLit(_, _, _) => type_queue.push(Datatype::String), - Token::Keyword(name, line, col) => + debug_string = format!("operation: {:?}: {:?}", operation, type_queue); + } + match operation + { + Operation::Dequeue(line, col) => { - if intrinsics.contains_key(name.as_str()) + if type_queue.is_empty() { - for req_type in &intrinsics.get(name.as_str()).unwrap().0 + panic!("Attempted to dequeue an element while the queue was empty at {}:{}", line, col); + } + type_queue.remove(0); + } + Operation::Enqueue(datatype, _, _, _) => + { + type_queue.push(*datatype); + } + Operation::FunctionCall(function_name, line, col) => + { + let function = functions.iter().find(|x| &x.name == function_name).unwrap(); + if function.ins.len() > type_queue.len() + { + panic!("Attempted to call function '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", function.name, line, col, function.ins, type_queue); + } + for in_type in &function.ins + { + let actual_type = type_queue.remove(0); + if in_type != &actual_type { - let found_type = type_queue.remove(0); - if req_type != &Datatype::Any && found_type != Datatype::Any && req_type != &found_type - { - panic!("Expected {:?} but got {:?} for {} at {}:{}", req_type, found_type, name, line, col); - } - } - for out_type in &intrinsics.get(name.as_str()).unwrap().1 - { - type_queue.push(out_type.clone()); + panic!("Attempted to call function '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", function.name, line, col, in_type, actual_type); } } - else if let Some(function) = functions.iter().find(|x| &x.name == name) + type_queue.extend_from_slice(&function.outs); + } + Operation::If(if_block, maybe_else_block, line, col) => + { + if type_queue.is_empty() { - for req_type in &function.ins - { - let found_type = type_queue.remove(0); - if req_type != &Datatype::Any && found_type != Datatype::Any && req_type != &found_type - { - panic!("Expected {:?} but got {:?} for {} at {}:{}", req_type, found_type, name, line, col); - } - } - for out_type in &function.outs - { - type_queue.push(out_type.clone()); - } + panic!("Encountered if block with an empty queue at {}:{}", line, col); } - else if CONTROL_STRUCTURE_NAMES.contains(&name.as_str()) + let comparison_type = type_queue.remove(0); + if comparison_type != Datatype::Int { - match name.as_str() + panic!("Expected an int as an if condition but got {:?} instead at {}:{}", comparison_type, line, col); + } + if debug + { + println!("Starting to typecheck if block"); + } + let if_ret = get_return_type(if_block, &type_queue, functions, intrinsics, debug); + let else_ret = + if let Some(else_block) = maybe_else_block + { + if debug { - "if" => - { - let found_type = type_queue.remove(0) ; - if found_type != Datatype::Any && found_type != Datatype::Int - { - panic!("Wrong type {:?} for if at {}:{}", found_type, line, col); - } - } - _ => todo!() + println!("Starting to typecheck else block"); } + get_return_type(else_block, &type_queue, functions, intrinsics, debug) } else { - panic!("Unrecognized keyword {} at {}:{}", name, line, col); + type_queue.clone() + }; + if if_ret != else_ret + { + panic!("Incompatible queue states after if/else construction, expected {:?} but got {:?}", if_ret, else_ret); } + type_queue.clear(); + type_queue.extend_from_slice(&if_ret); + } + Operation::Intrinsic(intrinsic_name, line, col) => + { + let io = intrinsics.get(intrinsic_name.as_str()).unwrap(); + if io.0.len() > type_queue.len() + { + panic!("Attempted to call intrinsic '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", intrinsic_name, line, col, io.0, type_queue); + } + for in_type in &io.0 + { + let actual_type = type_queue.remove(0); + if in_type != &actual_type + { + panic!("Attempted to call intrinsic '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", intrinsic_name, line, col, in_type, actual_type); + } + } + type_queue.extend_from_slice(&io.1); + } + Operation::While(while_block, line, col) => + { + if type_queue.is_empty() + { + panic!("Encountered while block with an empty queue at {}:{}", line, col); + } + let &comparison_type = type_queue.get(0).unwrap(); + if comparison_type != Datatype::Int + { + panic!("Expected an int as a while condition but got {:?} instead at {}:{}", comparison_type, line, col); + } + if debug + { + println!("Starting to typecheck while block"); + } + typecheck_block(while_block, type_queue, type_queue, functions, intrinsics, debug); + } + } + if debug + { + println!("{} => {:?}", debug_string, type_queue); + } + } + return type_queue.clone(); +} + +fn validate_function_calls(operations: &Vec, functions: &Vec, debug: bool) +{ + for function in functions + { + validate_function_calls_in_block(&function.content, functions, debug); + if debug + { + println!("Successfully validated function calls in function '{}'", function.name); + } + } + validate_function_calls_in_block(operations, functions, debug); + if debug + { + println!("Successfully validated function calls in main operations"); + } +} + +fn validate_function_calls_in_block(block: &Vec, functions: &Vec, debug: bool) +{ + for operation in block + { + match operation + { + Operation::Intrinsic(_, _, _) | Operation::Enqueue(_, _, _, _) | Operation::Dequeue(_, _) => {}, + Operation::FunctionCall(function_name, line, col) => + { + if !functions.iter().any(|x| &x.name == function_name) + { + panic!("Call to unknown function {} at {}:{}", function_name, line, col); + } + } + Operation::If(if_block, maybe_else_block, _, _) => + { + validate_function_calls_in_block(if_block, functions, debug); + if let Some(else_block) = maybe_else_block + { + validate_function_calls_in_block(else_block, functions, debug); + } + } + Operation::While(while_block, _, _) => + { + validate_function_calls_in_block(while_block, functions, debug); } } } } -fn extract_functions(tokens: &mut Vec, intrinsics: &HashMap<&str, (Vec, Vec)>) -> Vec +fn extract_functions(tokens: &mut Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Vec { + let mut tokens_iter = tokens.iter().peekable(); let mut functions: Vec = Vec::new(); - let mut state = FunctionExtractionState::Outside; - let mut ins: Vec = Vec::new(); - let mut outs: Vec = Vec::new(); - let mut function_name = String::from(""); - let mut content: Vec = Vec::new(); - let mut indices_to_remove: Vec = Vec::new(); - let mut depth = 0; - for (i, token) in tokens.iter().enumerate() + let mut new_tokens: Vec = Vec::new(); + while let Some(token) = tokens_iter.next() { - match state + if let Token::Keyword(word, line, col) = token { - FunctionExtractionState::Outside => + if word == "function" { - if let Token::Keyword(name, _, _) = token + if debug { - if name == &String::from("function") - { - state = FunctionExtractionState::Ins; - } + print!("Found a function at {}:{}", line, col); } - } - FunctionExtractionState::Ins => - { - match token + let mut ins: Vec = Vec::new(); + loop { - Token::Keyword(name, line, col) => + let maybe_token = tokens_iter.next(); + match maybe_token { - match name.as_str() + Some(token) => { - "int" => ins.push(Datatype::Int), - "str" => ins.push(Datatype::String), - "ptr" => ins.push(Datatype::Pointer), - "any" => ins.push(Datatype::Any), - "=>" => state = FunctionExtractionState::Outs, - _ => panic!("Unknown datatype '{}' at {}:{}", name, line, col) - } - }, - Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col), - } - } - FunctionExtractionState::Outs => - { - match token - { - Token::Keyword(name, _, _) => - { - match name.as_str() - { - "int" => outs.push(Datatype::Int), - "str" => outs.push(Datatype::String), - "ptr" => outs.push(Datatype::Pointer), - "any" => outs.push(Datatype::Any), - _ => + match token { - if let Token::Keyword(name, _, _) = token + Token::IntLit(_, line, col) | Token::StringLit(_, line, col) => { - if functions.iter().any(|x| &x.name == name) - { - panic!("A function with name {} already exists", name); - } - if intrinsics.contains_key(name.as_str()) - { - panic!("Function names cannot have the name of intrinsics: {}", name); - } - function_name = name.clone(); + panic!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col); } - else + Token::Keyword(word, line, col) => { - panic!("Expected a function name") // TODO: Add location + if word == "=>" + { + break; + } + match word.as_str() + { + "any" => ins.push(Datatype::Any), + "str" => ins.push(Datatype::String), + "int" => ins.push(Datatype::Int), + _ => panic!("Expected input parameters for a function but got {} instead at {}:{}", word, line, col) + } } - state =FunctionExtractionState::OpenCurly; } } - }, - Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col), - } - } - FunctionExtractionState::OpenCurly => - { - if let Token::Keyword(name, line, col) = token - { - if name == "{" - { - depth += 1; - state = FunctionExtractionState::Body - } - else - { - panic!("Expected '{{' to open the function's body at {}:{}", line, col) + None => panic!("Unexpected end of file while extracting a function") } } - else + if debug { - panic!("Expected '{{' to open the function's body") // TODO: Add location + println!("ins: {:?}", ins); } - } - FunctionExtractionState::Body => - { - if let Token::Keyword(name, _, _) = token + let mut outs: Vec = Vec::new(); + loop { - match name.as_str() + let maybe_token = tokens_iter.next(); + match maybe_token { - "{" => + Some(token) => { - depth += 1; - } - "}" => - { - depth -= 1; - if depth == 0 + match token { - state = FunctionExtractionState::Outside; - functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()}); - function_name.clear(); - ins.clear(); - outs.clear(); - content.clear(); - indices_to_remove.push(i); - continue; + Token::IntLit(_, line, col) | Token::StringLit(_, line, col) => + { + panic!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col); + } + Token::Keyword(word, line, col) => + { + match word.as_str() + { + "any" => outs.push(Datatype::Any), + "str" => outs.push(Datatype::String), + "int" => outs.push(Datatype::Int), + "{" | "}" => panic!("Expected function name but got {} at {}:{}", word, line, col), + _ => + { + if functions.iter().any(|x| &x.name == word) + { + panic!("Redeclaration of function '{}' at {}:{}", word, line, col); + } + if debug + { + println!("outs: {:?}", outs); + } + let block = parse_block(&mut tokens_iter, intrinsics, debug); + functions.push(Function {name: word.clone(), ins, outs, content: block}); + break; + } + } + } } } - _ => {} + None => panic!("Unexpected end of file while extracting a function") } } - content.push(token.clone()); + } + else + { + new_tokens.push(token.clone()); } } - - if state != FunctionExtractionState::Outside + else { - indices_to_remove.push(i); + new_tokens.push(token.clone()); } } - indices_to_remove.reverse(); - for i in indices_to_remove - { - tokens.remove(i); - } + tokens.clear(); + tokens.extend_from_slice(&new_tokens); return functions; } -#[derive(Debug, PartialEq)] -enum FunctionExtractionState +fn parse_block(tokens_iter: &mut Peekable>, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Vec { - Outside, - Ins, - Outs, - OpenCurly, - Body, + if let Some(Token::Keyword(word, line, col)) = tokens_iter.next() + { + if word != "{" + { + panic!("Expected '{{' to open a block but got {} at {}:{}", word, line, col); + } + } + else + { + panic!("Expected '{{' to open a block"); + } + return parse_until_delimiter(tokens_iter, intrinsics, Some("}"), debug); +} + +fn parse_until_delimiter(tokens_iter: &mut Peekable>, intrinsics: &HashMap<&str, (Vec, Vec)>, delimiter: Option<&str>, debug: bool) -> Vec +{ + let mut operations: Vec = Vec::new(); + loop + { + let maybe_token = tokens_iter.next(); + match maybe_token + { + Some(token) => + { + match token + { + Token::IntLit(value, line, col) => + { + operations.push(Operation::Enqueue(Datatype::Int, value.clone(), *line, *col)); + } + Token::StringLit(value, line, col) => + { + operations.push(Operation::Enqueue(Datatype::String, value.clone(), *line, *col)); + } + Token::Keyword(word, line, col) => + { + if intrinsics.contains_key(word.as_str()) + { + operations.push(Operation::Intrinsic(word.clone(), *line, *col)); + } + else if word == "if" + { + let block = parse_block(tokens_iter, intrinsics, debug); + let else_block = + if let Some(Token::Keyword(maybe_else, _, _)) = tokens_iter.peek() + { + if maybe_else == "else" + { + tokens_iter.next(); + Some(parse_block(tokens_iter, intrinsics, debug)) + } + else + { + None + } + } + else + { + None + }; + operations.push(Operation::If(block, else_block, *line, *col)); + } + else if word == "while" + { + operations.push(Operation::While(parse_block(tokens_iter, intrinsics, debug), *line, *col)); + } + else if word == "deq" + { + operations.push(Operation::Dequeue(*line, *col)); + } + else if Some(word.as_str()) == delimiter + { + return operations; + } + else if word == "{" || word == "function" + { + panic!("Unexpected keyword {} at {}:{}", word, line, col); + } + else + { + operations.push(Operation::FunctionCall(word.clone(), *line, *col)); + } + } + } + } + None => + { + if delimiter.is_some() + { + panic!("Reached the end of the file while parsing a block") + } + else + { + return operations; + } + } + } + } } fn usage() @@ -414,9 +599,9 @@ fn tokenize(text: &str) -> Vec if ch.is_whitespace() { state = TokenizerState::Whitespace; - if let Ok(number) = word.parse::() + if let Ok(_) = word.parse::() { - tokens.push(Token::IntLit(number, line, col)); + tokens.push(Token::IntLit(word.clone(), line, col)); } else { diff --git a/test.qbl b/test.qbl index 2e2b3e0..e3af7ef 100644 --- a/test.qbl +++ b/test.qbl @@ -22,4 +22,15 @@ function int => str check { "False" } -} \ No newline at end of file +} + +function int => whileFunction +{ + while + { + 1 - "test" req print + } + deq +} + +42 whileFunction \ No newline at end of file