use core::panic; use std::collections::HashMap; use std::env; use std::fs; use std::iter::Peekable; use std::process::exit; #[derive(Debug, Clone, PartialEq)] enum Token { StringLit(String, i32, i32), IntLit(String, i32, i32), Keyword(String, i32, i32), } enum TokenizerState { Whitespace, Quote, Keyword, Comment, } #[derive(Debug,Clone,Copy)] enum Datatype { Int, String, //Pointer, Any, } impl PartialEq for Datatype { fn eq(&self, other: &Self) -> bool { core::mem::discriminant(self) == core::mem::discriminant(&Datatype::Any) || core::mem::discriminant(other) == core::mem::discriminant(&Datatype::Any) || core::mem::discriminant(self) == core::mem::discriminant(other) } } #[derive(Debug)] struct Function { name: String, ins: Vec, outs: Vec, content: Vec, } #[derive(Debug)] enum Operation { Enqueue(Datatype, String, i32, i32), Dequeue(i32, i32), Requeue(i32, i32), Intrinsic(String, i32, i32), FunctionCall(String, i32, i32), If(Vec, Option>, i32, i32), While(Vec, i32, i32), } fn main() { let intrinsics: HashMap<&str, (Vec, Vec)> = HashMap::from( [ ("print", (Vec::from([Datatype::Any]), Vec::new())), ("-", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))), ]); let args: Vec = env::args().collect(); if args.len() < 2 { usage() } let mut debug = false; let mut interpret = false; for arg in &args[3..] { match arg.as_str() { "-d" | "--debug" => debug = true, "-i" | "--interpret" => interpret = true, _ => panic!("Unknown option {}", arg), } } match args[1].as_str() { "-c" | "--compile" => { let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file"); let mut tokens: Vec = tokenize(&file_content); println!("---Done tokenizing, got {} tokens---", tokens.len()); let functions: Vec = extract_functions(&mut tokens, &intrinsics, debug); println!("---Done extracting functions, got {} functions and reduced the token count to {}---", functions.len(), tokens.len()); let operations = parse_until_delimiter(&mut tokens.iter().peekable(), &intrinsics, None, debug); println!("---Done parsing tokens into {} operations---", operations.len()); validate_function_calls(&operations, &functions, debug); println!("---Done validating function calls---"); typecheck(&operations, &functions, &intrinsics, debug); println!("---Done typechecking---"); if interpret { println!("---Starting to interpret the program---\n\n"); interpret_program(&operations, &mut Vec::new(), &functions, &intrinsics, debug); } } _ => panic!("Unknown option {}", args[1]) } } fn interpret_program(operations: &Vec, queue: &mut Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) { for operation in operations { if debug { println!("before: {:?}: {:?}", operation, queue); } match operation { Operation::Dequeue(_, _) => { queue.remove(0); } Operation::Enqueue(_, value, _, _) => { queue.push(value.clone()); } Operation::Requeue(_, _) => { let val = queue.remove(0); queue.push(val); } Operation::FunctionCall(function_name, _, _) => { interpret_program(&functions.iter().find(|x| &x.name == function_name).unwrap().content, queue, functions, intrinsics, debug); } Operation::If(if_block, maybe_else_block, _, _) => { let val = queue.remove(0); // TODO: Add bool type if val == "0" { interpret_program(if_block, queue, functions, intrinsics, debug); } else if let Some(else_block) = maybe_else_block { interpret_program(else_block, queue, functions, intrinsics, debug); } } Operation::Intrinsic(intrinsic_name, line, col) => { match intrinsic_name.as_str() { "print" => { print!("{}", queue.remove(0)); } "-" => { let minuend = queue.remove(0).parse::().unwrap(); let subtrahend = queue.remove(0).parse::().unwrap(); queue.push((minuend - subtrahend).to_string()); } _ => { panic!("Unexpected intrinsic '{}' at {}:{}", intrinsic_name, line, col); } } } Operation::While(while_block, _, _) => { loop { let val = queue.get(0).unwrap(); if val == "0" { break; } interpret_program(while_block, queue, functions, intrinsics, debug); } } } if debug { println!("after: {:?}: {:?}", operation, queue); } } } fn typecheck(operations: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) { for function in functions { if debug { println!("Now typechecking function '{}'", function.name); } typecheck_block(&function.content, &function.ins, &function.outs, functions, intrinsics, debug); if debug { println!("Successfully typechecked function '{}'", function.name); } } if debug { println!("Now typechecking main operations"); } typecheck_block(operations, &Vec::new(), &Vec::new(), functions, intrinsics, debug); if debug { println!("Successfully typechecked main operations"); } } fn typecheck_block(operations: &Vec, ins: &Vec, outs: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) { let actual_outs = get_return_type(operations, ins, functions, intrinsics, debug); if &actual_outs != outs { let (line, col) = match operations.last() { Some(operation) => { match operation { Operation::Enqueue(_, _, line, col) | Operation::Requeue(line, col) | Operation::FunctionCall(_, line, col) | Operation::If(_, _, line, col) | Operation::Intrinsic(_, line, col) | Operation::While(_, line, col) | Operation::Dequeue(line, col) => (*line, *col), } } None => (-1, -1) }; panic!("Wrong queue state at the end of a block, expected {:?} but got {:?} at {}:{}", outs, actual_outs, line, col); } } fn get_return_type(operations: &Vec, ins: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Vec { let type_queue: &mut Vec = &mut Vec::new(); type_queue.extend_from_slice(ins); let mut debug_string = String::from(""); for operation in operations { if debug { debug_string = format!("operation: {:?}: {:?}", operation, type_queue); } match operation { Operation::Dequeue(line, col) => { if type_queue.is_empty() { panic!("Attempted to dequeue an element while the queue was empty at {}:{}", line, col); } type_queue.remove(0); } Operation::Enqueue(datatype, _, _, _) => { type_queue.push(*datatype); } Operation::Requeue(line, col) => { if type_queue.is_empty() { panic!("Attempted to requeue an element while the queue was empty at {}:{}", line, col); } let typ = type_queue.remove(0); type_queue.push(typ); } Operation::FunctionCall(function_name, line, col) => { let function = functions.iter().find(|x| &x.name == function_name).unwrap(); if function.ins.len() > type_queue.len() { panic!("Attempted to call function '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", function.name, line, col, function.ins, type_queue); } for in_type in &function.ins { let actual_type = type_queue.remove(0); if in_type != &actual_type { panic!("Attempted to call function '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", function.name, line, col, in_type, actual_type); } } type_queue.extend_from_slice(&function.outs); } Operation::If(if_block, maybe_else_block, line, col) => { if type_queue.is_empty() { panic!("Encountered if block with an empty queue at {}:{}", line, col); } let comparison_type = type_queue.remove(0); if comparison_type != Datatype::Int { panic!("Expected an int as an if condition but got {:?} instead at {}:{}", comparison_type, line, col); } if debug { println!("Starting to typecheck if block"); } let if_ret = get_return_type(if_block, &type_queue, functions, intrinsics, debug); let else_ret = if let Some(else_block) = maybe_else_block { if debug { println!("Starting to typecheck else block"); } get_return_type(else_block, &type_queue, functions, intrinsics, debug) } else { type_queue.clone() }; if if_ret != else_ret { panic!("Incompatible queue states after if/else construction, expected {:?} but got {:?}", if_ret, else_ret); } type_queue.clear(); type_queue.extend_from_slice(&if_ret); } Operation::Intrinsic(intrinsic_name, line, col) => { let io = intrinsics.get(intrinsic_name.as_str()).unwrap(); if io.0.len() > type_queue.len() { panic!("Attempted to call intrinsic '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", intrinsic_name, line, col, io.0, type_queue); } for in_type in &io.0 { let actual_type = type_queue.remove(0); if in_type != &actual_type { panic!("Attempted to call intrinsic '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", intrinsic_name, line, col, in_type, actual_type); } } type_queue.extend_from_slice(&io.1); } Operation::While(while_block, line, col) => { if type_queue.is_empty() { panic!("Encountered while block with an empty queue at {}:{}", line, col); } let &comparison_type = type_queue.get(0).unwrap(); if comparison_type != Datatype::Int { panic!("Expected an int as a while condition but got {:?} instead at {}:{}", comparison_type, line, col); } if debug { println!("Starting to typecheck while block"); } typecheck_block(while_block, type_queue, type_queue, functions, intrinsics, debug); } } if debug { println!("{} => {:?}", debug_string, type_queue); } } return type_queue.clone(); } fn validate_function_calls(operations: &Vec, functions: &Vec, debug: bool) { for function in functions { validate_function_calls_in_block(&function.content, functions, debug); if debug { println!("Successfully validated function calls in function '{}'", function.name); } } validate_function_calls_in_block(operations, functions, debug); if debug { println!("Successfully validated function calls in main operations"); } } fn validate_function_calls_in_block(block: &Vec, functions: &Vec, debug: bool) { for operation in block { match operation { Operation::Intrinsic(_, _, _) | Operation::Enqueue(_, _, _, _) | Operation::Dequeue(_, _) | Operation::Requeue(_, _) => {}, Operation::FunctionCall(function_name, line, col) => { if !functions.iter().any(|x| &x.name == function_name) { panic!("Call to unknown function {} at {}:{}", function_name, line, col); } } Operation::If(if_block, maybe_else_block, _, _) => { validate_function_calls_in_block(if_block, functions, debug); if let Some(else_block) = maybe_else_block { validate_function_calls_in_block(else_block, functions, debug); } } Operation::While(while_block, _, _) => { validate_function_calls_in_block(while_block, functions, debug); } } } } fn extract_functions(tokens: &mut Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Vec { let mut tokens_iter = tokens.iter().peekable(); let mut functions: Vec = Vec::new(); let mut new_tokens: Vec = Vec::new(); while let Some(token) = tokens_iter.next() { if let Token::Keyword(word, line, col) = token { if word == "function" { if debug { print!("Found a function at {}:{}", line, col); } let mut ins: Vec = Vec::new(); loop { let maybe_token = tokens_iter.next(); match maybe_token { Some(token) => { match token { Token::IntLit(_, line, col) | Token::StringLit(_, line, col) => { panic!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col); } Token::Keyword(word, line, col) => { if word == "=>" { break; } match word.as_str() { "any" => ins.push(Datatype::Any), "str" => ins.push(Datatype::String), "int" => ins.push(Datatype::Int), _ => panic!("Expected input parameters for a function but got {} instead at {}:{}", word, line, col) } } } } None => panic!("Unexpected end of file while extracting a function") } } if debug { println!("ins: {:?}", ins); } let mut outs: Vec = Vec::new(); loop { let maybe_token = tokens_iter.next(); match maybe_token { Some(token) => { match token { Token::IntLit(_, line, col) | Token::StringLit(_, line, col) => { panic!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col); } Token::Keyword(word, line, col) => { match word.as_str() { "any" => outs.push(Datatype::Any), "str" => outs.push(Datatype::String), "int" => outs.push(Datatype::Int), "{" | "}" | "deq" | "req" => panic!("Expected function name but got {} at {}:{}", word, line, col), _ => { if functions.iter().any(|x| &x.name == word) { panic!("Redeclaration of function '{}' at {}:{}", word, line, col); } if intrinsics.contains_key(word.as_str()) { panic!("Function name {} at {}:{} is already an intrinsic", word, line, col); } if debug { println!("outs: {:?}", outs); } let block = parse_block(&mut tokens_iter, intrinsics, debug); functions.push(Function {name: word.clone(), ins, outs, content: block}); break; } } } } } None => panic!("Unexpected end of file while extracting a function") } } } else { new_tokens.push(token.clone()); } } else { new_tokens.push(token.clone()); } } tokens.clear(); tokens.extend_from_slice(&new_tokens); return functions; } fn parse_block(tokens_iter: &mut Peekable>, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Vec { if let Some(Token::Keyword(word, line, col)) = tokens_iter.next() { if word != "{" { panic!("Expected '{{' to open a block but got {} at {}:{}", word, line, col); } } else { panic!("Expected '{{' to open a block"); } return parse_until_delimiter(tokens_iter, intrinsics, Some("}"), debug); } fn parse_until_delimiter(tokens_iter: &mut Peekable>, intrinsics: &HashMap<&str, (Vec, Vec)>, delimiter: Option<&str>, debug: bool) -> Vec { let mut operations: Vec = Vec::new(); loop { let maybe_token = tokens_iter.next(); match maybe_token { Some(token) => { match token { Token::IntLit(value, line, col) => { operations.push(Operation::Enqueue(Datatype::Int, value.clone(), *line, *col)); } Token::StringLit(value, line, col) => { operations.push(Operation::Enqueue(Datatype::String, value.clone(), *line, *col)); } Token::Keyword(word, line, col) => { if intrinsics.contains_key(word.as_str()) { operations.push(Operation::Intrinsic(word.clone(), *line, *col)); } else if word == "if" { let block = parse_block(tokens_iter, intrinsics, debug); let else_block = if let Some(Token::Keyword(maybe_else, _, _)) = tokens_iter.peek() { if maybe_else == "else" { tokens_iter.next(); Some(parse_block(tokens_iter, intrinsics, debug)) } else { None } } else { None }; operations.push(Operation::If(block, else_block, *line, *col)); } else if word == "while" { operations.push(Operation::While(parse_block(tokens_iter, intrinsics, debug), *line, *col)); } else if word == "deq" { operations.push(Operation::Dequeue(*line, *col)); } else if word == "req" { operations.push(Operation::Requeue(*line, *col)); } else if Some(word.as_str()) == delimiter { return operations; } else if word == "{" || word == "function" { panic!("Unexpected keyword {} at {}:{}", word, line, col); } else { operations.push(Operation::FunctionCall(word.clone(), *line, *col)); } } } } None => { if delimiter.is_some() { panic!("Reached the end of the file while parsing a block") } else { return operations; } } } } } fn usage() { println!("Usage: kurz -c path/to/file"); exit(0); } fn tokenize(text: &str) -> Vec { let mut tokens: Vec = Vec::new(); let mut line = 1; let mut col = 1; let mut state = TokenizerState::Whitespace; let mut word = String::new(); let mut iter = text.chars().peekable(); while let Some(ch) = iter.next() { if ch == '/' && iter.peek() == Some(&'/') { state = TokenizerState::Comment; } match state { TokenizerState::Comment => { if ch == '\n' { state = TokenizerState::Whitespace; } } TokenizerState::Whitespace => { // If ch is whitespace, do nothing if !ch.is_whitespace() { match ch { '"' => { state = TokenizerState::Quote; } _ => { state = TokenizerState::Keyword; word.push(ch); } } } } TokenizerState::Quote => { if ch == '"' { state = TokenizerState::Whitespace; tokens.push(Token::StringLit(word.clone(), line, col)); word.clear(); } else { word.push(ch); } } TokenizerState::Keyword => { if ch.is_whitespace() { state = TokenizerState::Whitespace; if let Ok(_) = word.parse::() { tokens.push(Token::IntLit(word.clone(), line, col)); } else { tokens.push(Token::Keyword(word.clone(), line, col)); } word.clear(); } else { match ch { '"' => panic!("Having '\"' in the middle of a word is not allowed"), _ => { word.push(ch); } } } } } col += 1; if ch == '\n' { col = 1; line += 1; } } match state { TokenizerState::Quote => { panic!("Encountered EOF before closing string"); } TokenizerState::Whitespace | TokenizerState::Comment => {}, TokenizerState::Keyword => { tokens.push(Token::Keyword(word.clone(), line, col)); } } tokens }