use core::panic; use std::collections::HashMap; use std::env; use std::fs; use std::iter::Peekable; use std::process::exit; #[derive(Debug, Clone, PartialEq)] enum Token { StringLit(String, i32, i32), IntLit(String, i32, i32), BoolLit(String, i32, i32), Keyword(String, i32, i32), } enum TokenizerState { Whitespace, Quote, Keyword, Comment, } #[derive(Debug,Clone,Copy)] enum Datatype { Int, String, Bool, //Pointer, Any, } impl PartialEq for Datatype { fn eq(&self, other: &Self) -> bool { core::mem::discriminant(self) == core::mem::discriminant(&Datatype::Any) || core::mem::discriminant(other) == core::mem::discriminant(&Datatype::Any) || core::mem::discriminant(self) == core::mem::discriminant(other) } } #[derive(Debug)] struct Function { name: String, ins: Vec, outs: Vec, content: Vec, } #[derive(Debug)] enum Operation { Enqueue(Datatype, String, i32, i32), Dequeue(i32, i32), // TODO: req can be implemented in terms of dup and dequeue Requeue(i32, i32), Swap(i32, i32), Dup(i32, i32), Intrinsic(String, i32, i32), FunctionCall(String, i32, i32), If(Vec, Option>, i32, i32), While(Vec, i32, i32), Depth(i32, i32), QueueDiagnostic(i32, i32), } fn main() { let intrinsics: HashMap<&str, (Vec, Vec)> = HashMap::from( [ ("print", (Vec::from([Datatype::Any]), Vec::new())), ("println", (Vec::from([Datatype::Any]), Vec::new())), ("-", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))), ("+", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))), ("<", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))), (">", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))), ("==", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))), ("!=", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))), ("decrease", (Vec::from([Datatype::Int]), Vec::from([Datatype::Int]))), ]); let args: Vec = env::args().collect(); if args.len() < 2 { usage() } let mut debug = false; let mut interpret = false; for arg in &args[3..] { match arg.as_str() { "-d" | "--debug" => debug = true, "-i" | "--interpret" => interpret = true, _ => panic!("Unknown option {}", arg), } } match args[1].as_str() { "-t" | "--test" => { for f in fs::read_dir(&args[2]).unwrap() { let f = f.unwrap(); let file_content = fs::read_to_string(f.path()).unwrap(); println!("========NOW TESTING '{:?}'========", f.path()); match compile(file_content, &intrinsics, interpret, debug) { Ok(()) => println!("\n\n\n---Successfully parsed '{:?}'---", f.path()), Err(msg) => println!("ERROR: {}", msg), } } } "-c" | "--compile" => { let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file"); match compile(file_content, &intrinsics, interpret, debug) { Ok(()) => println!("\n\n\n---Successfully parsed '{}'---", args[2]), Err(msg) => println!("ERROR: {}", msg), } } _ => panic!("Unknown option {}", args[1]) } } fn compile(file_content: String, intrinsics: &HashMap<&str, (Vec, Vec)>, interpret: bool, debug: bool) -> Result<(), String> { let mut tokens: Vec = tokenize(&file_content)?; println!("---Done tokenizing, got {} tokens---", tokens.len()); let functions: Vec = extract_functions(&mut tokens, &intrinsics, debug)?; println!("---Done extracting functions, got {} functions and reduced the token count to {}---", functions.len(), tokens.len()); let operations = parse_until_delimiter(&mut tokens.iter().peekable(), &intrinsics, None, debug)?; println!("---Done parsing tokens into {} operations---", operations.len()); validate_function_calls(&operations, &functions, debug)?; println!("---Done validating function calls---"); typecheck(&operations, &functions, &intrinsics, debug)?; println!("---Done typechecking---"); if interpret { println!("---Starting to interpret the program---\n\n"); interpret_program(&operations, &mut Vec::new(), &functions, &intrinsics, debug); } return Ok(()); } fn interpret_program(operations: &Vec, queue: &mut Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) { for operation in operations { if debug { println!("before: {:?}: {:?}", operation, queue); } match operation { Operation::Dequeue(_, _) => { queue.remove(0); } Operation::Enqueue(_, value, _, _) => { queue.push(value.clone()); } Operation::Requeue(_, _) => { let val = queue.remove(0); queue.push(val); } Operation::Dup(_, _) => { let val = queue.get(0).unwrap(); queue.push(val.clone()); } Operation::Swap(_, _) => { let first = queue.remove(0); let second = queue.remove(0); queue.push(second); queue.push(first); } Operation::FunctionCall(function_name, _, _) => { let function = functions.iter().find(|x| &x.name == function_name).unwrap(); let function_context: &mut Vec = &mut Vec::new(); for _ in 0..function.ins.len() { let val = queue.remove(0); function_context.push(val); } interpret_program(&function.content, function_context, functions, intrinsics, debug); for val in function_context { queue.push(val.to_string()); } } Operation::If(if_block, maybe_else_block, _, _) => { let val = queue.remove(0); if val == "true" { interpret_program(if_block, queue, functions, intrinsics, debug); } else if let Some(else_block) = maybe_else_block { interpret_program(else_block, queue, functions, intrinsics, debug); } } Operation::Intrinsic(intrinsic_name, line, col) => { match intrinsic_name.as_str() { "print" => { print!("{}", queue.remove(0)); } "-" => { let minuend = queue.remove(0).parse::().unwrap(); let subtrahend = queue.remove(0).parse::().unwrap(); queue.push((minuend - subtrahend).to_string()); } "+" => { let addend1 = queue.remove(0).parse::().unwrap(); let addend2 = queue.remove(0).parse::().unwrap(); queue.push((addend1 + addend2).to_string()); } ">" => { let first = queue.remove(0).parse::().unwrap(); let second = queue.remove(0).parse::().unwrap(); queue.push((first > second).to_string()); } "<" => { let first = queue.remove(0).parse::().unwrap(); let second = queue.remove(0).parse::().unwrap(); queue.push((first < second).to_string()); } "==" => { let first = queue.remove(0).parse::().unwrap(); let second = queue.remove(0).parse::().unwrap(); queue.push((first == second).to_string()); } "!=" => { let first = queue.remove(0).parse::().unwrap(); let second = queue.remove(0).parse::().unwrap(); queue.push((first != second).to_string()); } "decrease" => { let val = queue.remove(0).parse::().unwrap(); queue.push((val - 1).to_string()); } "println" => { println!("{}", queue.remove(0)); } _ => { panic!("Unexpected intrinsic '{}' at {}:{}", intrinsic_name, line, col); } } } Operation::While(while_block, _, _) => { loop { let val = queue.remove(0); if val == "false" { break; } interpret_program(while_block, queue, functions, intrinsics, debug); } } Operation::Depth(_, _) => { let depth = queue.len(); queue.push(depth.to_string()); } Operation::QueueDiagnostic(line, col) => { println!("---Queue state at {}:{}---\nlength: {}\n{:?}\n------------------------------", line, col, queue.len(), queue); } } if debug { println!("after: {:?}: {:?}", operation, queue); } } } fn typecheck(operations: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Result<(), String> { for function in functions { if debug { println!("Now typechecking function '{}'", function.name); } typecheck_block(&function.content, &function.ins, &function.outs, functions, intrinsics, debug)?; if debug { println!("Successfully typechecked function '{}'", function.name); } } if debug { println!("Now typechecking main operations"); } typecheck_block(operations, &Vec::new(), &Vec::new(), functions, intrinsics, debug)?; if debug { println!("Successfully typechecked main operations"); } return Ok(()); } fn typecheck_block(operations: &Vec, ins: &Vec, outs: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Result<(), String> { let actual_outs = get_return_type(operations, ins, functions, intrinsics, debug)?; if &actual_outs != outs { let (line, col) = match operations.last() { Some(operation) => { match operation { Operation::Enqueue(_, _, line, col) | Operation::Dequeue(line, col) | Operation::Requeue(line, col) | Operation::Dup(line, col) | Operation::Swap(line, col) | Operation::FunctionCall(_, line, col) | Operation::If(_, _, line, col) | Operation::Intrinsic(_, line, col) | Operation::While(_, line, col) | Operation::QueueDiagnostic(line, col) | Operation::Depth(line, col) => (*line, *col), } } None => (-1, -1) }; return Err(format!("Wrong queue state at the end of a block, expected {:?} but got {:?} at {}:{}", outs, actual_outs, line, col)); } return Ok(()); } fn get_return_type(operations: &Vec, ins: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Result, String> { let type_queue: &mut Vec = &mut Vec::new(); type_queue.extend_from_slice(ins); let mut debug_string = String::from(""); for operation in operations { if debug { debug_string = format!("operation: {:?}: {:?}", operation, type_queue); } match operation { Operation::Dequeue(line, col) => { if type_queue.is_empty() { return Err(format!("Attempted to dequeue an element while the queue was empty at {}:{}", line, col)); } type_queue.remove(0); } Operation::Enqueue(datatype, _, _, _) => { type_queue.push(*datatype); } Operation::Dup(line, col) => { if let Some(typ) = type_queue.get(0) { type_queue.push(typ.clone()); } else { return Err(format!("Attempted to dup an element while the queue was empty at {}:{}", line, col)); } } Operation::Requeue(line, col) => { if type_queue.is_empty() { return Err(format!("Attempted to requeue an element while the queue was empty at {}:{}", line, col)); } let typ = type_queue.remove(0); type_queue.push(typ); } Operation::Swap(line, col) => { if type_queue.is_empty() { panic!("Attempted to get the first element for a swap while the queue was empty at {}:{}", line, col); } let first_typ = type_queue.remove(0); if type_queue.is_empty() { panic!("Attempted to get the second element for a swap while the queue was empty at {}:{}", line, col); } let second_typ = type_queue.remove(0); type_queue.push(second_typ); type_queue.push(first_typ); } Operation::FunctionCall(function_name, line, col) => { let function = functions.iter().find(|x| &x.name == function_name).unwrap(); if function.ins.len() > type_queue.len() { return Err(format!("Attempted to call function '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", function.name, line, col, function.ins, type_queue)); } for in_type in &function.ins { let actual_type = type_queue.remove(0); if in_type != &actual_type { return Err(format!("Attempted to call function '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", function.name, line, col, in_type, actual_type)); } } type_queue.extend_from_slice(&function.outs); } Operation::If(if_block, maybe_else_block, line, col) => { if type_queue.is_empty() { return Err(format!("Encountered if block with an empty queue at {}:{}", line, col)); } let comparison_type = type_queue.remove(0); if comparison_type != Datatype::Bool { return Err(format!("Expected a Bool as an if condition but got {:?} instead at {}:{}", comparison_type, line, col)); } if debug { println!("Starting to typecheck if block"); } let if_ret = get_return_type(if_block, &type_queue, functions, intrinsics, debug)?; let else_ret = if let Some(else_block) = maybe_else_block { if debug { println!("Starting to typecheck else block"); } get_return_type(else_block, &type_queue, functions, intrinsics, debug)? } else { type_queue.clone() }; if if_ret != else_ret { return Err(format!("Incompatible queue states after if/else construction, expected {:?} but got {:?}", if_ret, else_ret)); } type_queue.clear(); type_queue.extend_from_slice(&if_ret); } Operation::Intrinsic(intrinsic_name, line, col) => { let io = intrinsics.get(intrinsic_name.as_str()).unwrap(); if io.0.len() > type_queue.len() { return Err(format!("Attempted to call intrinsic '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", intrinsic_name, line, col, io.0, type_queue)); } for in_type in &io.0 { let actual_type = type_queue.remove(0); if in_type != &actual_type { return Err(format!("Attempted to call intrinsic '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", intrinsic_name, line, col, in_type, actual_type)); } } type_queue.extend_from_slice(&io.1); } Operation::While(while_block, line, col) => { if type_queue.is_empty() { return Err(format!("Encountered while block with an empty queue at {}:{}", line, col)); } let comparison_type = type_queue.remove(0); if comparison_type != Datatype::Bool { panic!("Expected a Bool as a while condition but got {:?} instead at {}:{}", comparison_type, line, col); } if debug { println!("Starting to typecheck while block"); } let mut outs = type_queue.clone(); outs.insert(0, Datatype::Bool); typecheck_block(while_block, type_queue, &outs, functions, intrinsics, debug)?; } Operation::Depth(_, _) => { type_queue.push(Datatype::Int); } Operation::QueueDiagnostic(line, col) => { println!("---Type queue state at {}:{}---\nlength: {}\n{:?}\n------------------------------", line, col, type_queue.len(), type_queue); } } if debug { println!("{} => {:?}", debug_string, type_queue); } } return Ok(type_queue.clone()); } fn validate_function_calls(operations: &Vec, functions: &Vec, debug: bool) -> Result<(), String> { for function in functions { validate_function_calls_in_block(&function.content, functions, debug)?; if debug { println!("Successfully validated function calls in function '{}'", function.name); } } validate_function_calls_in_block(operations, functions, debug)?; if debug { println!("Successfully validated function calls in main operations"); } return Ok(()); } fn validate_function_calls_in_block(block: &Vec, functions: &Vec, debug: bool) -> Result<(), String> { for operation in block { match operation { Operation::Depth(_, _) | Operation::QueueDiagnostic(_, _) | Operation::Intrinsic(_, _, _) | Operation::Enqueue(_, _, _, _) | Operation::Dequeue(_, _) | Operation::Requeue(_, _) | Operation::Dup(_, _) | Operation::Swap(_, _) => {}, Operation::FunctionCall(function_name, line, col) => { if !functions.iter().any(|x| &x.name == function_name) { return Err(format!("Call to unknown function '{}' at {}:{}", function_name, line, col)); } } Operation::If(if_block, maybe_else_block, _, _) => { validate_function_calls_in_block(if_block, functions, debug)?; if let Some(else_block) = maybe_else_block { validate_function_calls_in_block(else_block, functions, debug)?; } } Operation::While(while_block, _, _) => { validate_function_calls_in_block(while_block, functions, debug)?; } } } return Ok(()); } fn extract_functions(tokens: &mut Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Result, String> { let mut tokens_iter = tokens.iter().peekable(); let mut functions: Vec = Vec::new(); let mut new_tokens: Vec = Vec::new(); while let Some(token) = tokens_iter.next() { if let Token::Keyword(word, line, col) = token { if word == "function" { if debug { println!("Found a function at {}:{}", line, col); } let mut ins: Vec = Vec::new(); loop { let maybe_token = tokens_iter.next(); match maybe_token { Some(token) => { match token { Token::IntLit(_, line, col) | Token::StringLit(_, line, col) | Token::BoolLit(_, line, col) => { return Err(format!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col)); } Token::Keyword(word, line, col) => { if word == "=>" { break; } match word.as_str() { "any" => ins.push(Datatype::Any), "str" => ins.push(Datatype::String), "int" => ins.push(Datatype::Int), "bool" => ins.push(Datatype::Bool), _ => return Err(format!("Expected input parameters for a function but got {} instead at {}:{}", word, line, col)) } } } } None => return Err(format!("Unexpected end of file while extracting a function")) } } if debug { println!("ins: {:?}", ins); } let mut outs: Vec = Vec::new(); loop { let maybe_token = tokens_iter.next(); match maybe_token { Some(token) => { match token { Token::IntLit(_, line, col) | Token::StringLit(_, line, col) | Token::BoolLit(_, line, col) => { return Err(format!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col)); } Token::Keyword(word, line, col) => { match word.as_str() { "any" => outs.push(Datatype::Any), "str" => outs.push(Datatype::String), "int" => outs.push(Datatype::Int), "bool" => outs.push(Datatype::Bool), "{" | "}" | "deq" | "req" | "dup" | "swp" | "true" | "false" | "depth" | "???" => return Err(format!("Expected function name but got {} at {}:{}", word, line, col)), _ => { if functions.iter().any(|x| &x.name == word) { return Err(format!("Redeclaration of function '{}' at {}:{}", word, line, col)); } if intrinsics.contains_key(word.as_str()) { return Err(format!("Function name {} at {}:{} is already an intrinsic", word, line, col)); } if debug { println!("outs: {:?}", outs); } let block = parse_block(&mut tokens_iter, intrinsics, debug)?; functions.push(Function {name: word.clone(), ins, outs, content: block}); break; } } } } } None => return Err(format!("Unexpected end of file while extracting a function")) } } } else { new_tokens.push(token.clone()); } } else { new_tokens.push(token.clone()); } } tokens.clear(); tokens.extend_from_slice(&new_tokens); return Ok(functions); } fn parse_block(tokens_iter: &mut Peekable>, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Result, String> { if let Some(Token::Keyword(word, line, col)) = tokens_iter.next() { if word != "{" { return Err(format!("Expected '{{' to open a block but got {} at {}:{}", word, line, col)); } } else { return Err(format!("Expected '{{' to open a block")); } return parse_until_delimiter(tokens_iter, intrinsics, Some("}"), debug); } fn parse_until_delimiter(tokens_iter: &mut Peekable>, intrinsics: &HashMap<&str, (Vec, Vec)>, delimiter: Option<&str>, debug: bool) -> Result, String> { let mut operations: Vec = Vec::new(); loop { let maybe_token = tokens_iter.next(); match maybe_token { Some(token) => { match token { Token::IntLit(value, line, col) => { operations.push(Operation::Enqueue(Datatype::Int, value.clone(), *line, *col)); } Token::StringLit(value, line, col) => { operations.push(Operation::Enqueue(Datatype::String, value.clone(), *line, *col)); } Token::BoolLit(value, line, col) => { operations.push(Operation::Enqueue(Datatype::Bool, value.clone(), *line, *col)); } Token::Keyword(word, line, col) => { if intrinsics.contains_key(word.as_str()) { operations.push(Operation::Intrinsic(word.clone(), *line, *col)); } else if word == "if" { let block = parse_block(tokens_iter, intrinsics, debug)?; let else_block = if let Some(Token::Keyword(maybe_else, _, _)) = tokens_iter.peek() { if maybe_else == "else" { tokens_iter.next(); Some(parse_block(tokens_iter, intrinsics, debug)?) } else { None } } else { None }; operations.push(Operation::If(block, else_block, *line, *col)); } else if word == "while" { operations.push(Operation::While(parse_block(tokens_iter, intrinsics, debug)?, *line, *col)); } else if word == "deq" { operations.push(Operation::Dequeue(*line, *col)); } else if word == "req" { operations.push(Operation::Requeue(*line, *col)); } else if word == "dup" { operations.push(Operation::Dup(*line, *col)); } else if word == "swp" { operations.push(Operation::Swap(*line, *col)); } else if word == "depth" { operations.push(Operation::Depth(*line, *col)); } else if word == "???" { operations.push(Operation::QueueDiagnostic(*line, *col)); } else if Some(word.as_str()) == delimiter { return Ok(operations); } else if word == "{" || word == "function" { return Err(format!("Unexpected keyword {} at {}:{}", word, line, col)); } else { operations.push(Operation::FunctionCall(word.clone(), *line, *col)); } } } } None => { if delimiter.is_some() { return Err(format!("Reached the end of the file while parsing a block")); } else { return Ok(operations); } } } } } fn usage() { println!("Usage: kurz -c path/to/file"); exit(0); } fn tokenize(text: &str) -> Result, String> { let mut tokens: Vec = Vec::new(); let mut line = 1; let mut col = 1; let mut state = TokenizerState::Whitespace; let mut word = String::new(); let mut iter = text.chars().peekable(); while let Some(ch) = iter.next() { if ch == '/' && iter.peek() == Some(&'/') { state = TokenizerState::Comment; } match state { TokenizerState::Comment => { if ch == '\n' { state = TokenizerState::Whitespace; } } TokenizerState::Whitespace => { // If ch is whitespace, do nothing if !ch.is_whitespace() { match ch { '"' => { state = TokenizerState::Quote; } _ => { state = TokenizerState::Keyword; word.push(ch); } } } } TokenizerState::Quote => { if ch == '"' { state = TokenizerState::Whitespace; tokens.push(Token::StringLit(word.clone().replace("\\n", "\n"), line, col)); word.clear(); } else { word.push(ch); } } TokenizerState::Keyword => { if ch.is_whitespace() { state = TokenizerState::Whitespace; if let Ok(_) = word.parse::() { tokens.push(Token::IntLit(word.clone(), line, col)); } else if word == "true" || word == "false" { tokens.push(Token::BoolLit(word.clone(), line, col)); } else { tokens.push(Token::Keyword(word.clone(), line, col)); } word.clear(); } else { match ch { '"' => return Err(format!("Having '\"' in the middle of a word is not allowed")), _ => { word.push(ch); } } } } } col += 1; if ch == '\n' { col = 1; line += 1; } } match state { TokenizerState::Quote => { return Err(format!("Encountered EOF before closing string")); } TokenizerState::Whitespace | TokenizerState::Comment => {}, TokenizerState::Keyword => { tokens.push(Token::Keyword(word.clone(), line, col)); } } Ok(tokens) }