use core::panic; use std::collections::HashMap; use std::env; use std::fs; use std::iter::Peekable; use std::path::PathBuf; use std::process::Command; use std::process::Stdio; use std::process::exit; use std::time::Instant; #[derive(Debug, Clone, PartialEq)] enum Token { StringLit(String, i32, i32), IntLit(String, i32, i32), BoolLit(String, i32, i32), Keyword(String, i32, i32), Apply(String, String, i32, i32), Import(i32, i32), } enum TokenizerState { Whitespace, Quote, Keyword, Comment, } #[derive(Debug,Clone,Copy, PartialEq)] enum Datatype { Int, String, Bool, //Pointer, // Any, } // impl PartialEq for Datatype // { // fn eq(&self, other: &Self) -> bool // { // core::mem::discriminant(self) == core::mem::discriminant(&Datatype::Any) || // core::mem::discriminant(other) == core::mem::discriminant(&Datatype::Any) || // core::mem::discriminant(self) == core::mem::discriminant(other) // } // } #[derive(Debug)] struct Function { name: String, ins: Vec, outs: Vec, content: Vec, } #[derive(Debug)] struct Arr { name: String, datatype: Datatype, length: i64, data: Vec, } #[derive(Debug)] enum Operation { Enqueue(Datatype, String, i32, i32), Dequeue(i32, i32), // TODO: req can be implemented in terms of dup and dequeue Requeue(i32, i32), Swap(i32, i32), Dup(i32, i32), Intrinsic(String, i32, i32), FunctionCall(String, i32, i32), If(Vec, Option>, i32, i32), While(Vec, i32, i32), Apply(String, String, i32, i32), Depth(i32, i32), QueueDiagnostic(i32, i32), Interrupt(i32, i32), } fn main() { let intrinsics: HashMap<&str, (Vec, Vec)> = HashMap::from( [ ("print", (Vec::from([Datatype::String]), Vec::new())), ("println", (Vec::from([Datatype::String]), Vec::new())), ("intToStr", (Vec::from([Datatype::Int]), Vec::from([Datatype::String]))), ("-", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))), ("+", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))), ("*", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))), ("<", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))), (">", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))), (">=", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))), ("<=", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))), ("==", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))), ("!=", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))), ("&&", (Vec::from([Datatype::Bool, Datatype::Bool]), Vec::from([Datatype::Bool]))), ]); let args: Vec = env::args().collect(); if args.len() < 2 { usage() } let mut debug = false; let mut interpret = false; let mut run = false; for arg in &args[3..] { match arg.as_str() { "-d" | "--debug" => debug = true, "-i" | "--interpret" => interpret = true, "-r" | "--run" => run = true, _ => panic!("Unknown option {}", arg), } } match args[1].as_str() { "-t" | "--test" => { let mut count = 0; for f in fs::read_dir(&args[2]).unwrap() { let f = f.unwrap(); let file_content = fs::read_to_string(f.path()).unwrap().replace("\r\n", "\n"); println!("========NOW TESTING {:?}========", f.path()); match compile(&file_content, f.path().to_str().unwrap(), &intrinsics, interpret, run, debug) { Ok(maybe_msg) => { println!("---Successfully parsed {:?}---", f.path()); if let Some(msg) = &maybe_msg { print!("---Output---\n'{}'\n", msg); } let expected = &format!("//valid,{}:END:", maybe_msg.unwrap_or(String::new()).replace("\n", "\n//")); if file_content.starts_with(expected) { println!("===PASSED==="); count += 1; } else if let Some(index) = file_content.find(":END:") { let expected_output = file_content[8..index].replace("\n//", "\n"); println!("\n===FAILED===\nExpected the output to be\n'{}'\n({})", expected_output, expected); } else { panic!("Could not find an ending marker (:END:) for the expected output in {:?}", f.file_name()); } } Err(msg) => { println!("ERROR: {}", msg); if file_content.starts_with(&format!("//invalid,{}:END:", msg.replace("\n", "\n//"))) { println!("===PASSED==="); count += 1; } else if file_content.starts_with("//invalid,") { if let Some(index) = file_content.find(":END:") { let expected = &format!("//invalid,{}:END:", msg.replace("\n", "\n//")); let expected_output = file_content[10..index].replace("\n//", "\n"); println!("\n===FAILED===\nExpected the output to be\n'{}'\n({})", expected_output, expected); } else { panic!("Could not find an ending marker (:END:) for the expected output in {:?}", f.file_name()); } } else { println!("Unexpected error"); } } } } println!("\n\n=========RESULT=========\n{}/{}", count, fs::read_dir(&args[2]).unwrap().count()); } "-c" | "--compile" => { let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file"); match compile(&file_content, &args[2], &intrinsics, interpret, run, debug) { Ok(maybe_msg) => { if let Some(msg) = maybe_msg { print!("---Output---\n\n{}", msg); } } Err(msg) => println!("ERROR: {}", msg), } } _ => panic!("Unknown option {}", args[1]) } } fn compile(file_content: &String, file_path: &str, intrinsics: &HashMap<&str, (Vec, Vec)>, interpret: bool, run: bool, debug: bool) -> Result, String> { let mut tokens: Vec = tokenize(&file_content)?; println!("---Done tokenizing, got {} tokens---", tokens.len()); let mut functions: Vec = extract_functions(&mut tokens, &intrinsics, debug)?; println!("---Done extracting functions, got {} functions and reduced the token count to {}---", functions.len(), tokens.len()); resolve_imports(&mut tokens, &mut functions, file_path, &mut Vec::from([PathBuf::from(file_path)]), intrinsics, debug)?; println!("---Done importing files---"); let mut arrays: Vec = extract_arrays(&mut tokens, &intrinsics, &functions, debug)?; println!("---Done extracting arrays, got {} arrays and reduced the token count to {}---", arrays.len(), tokens.len()); let operations = parse_until_delimiter(&mut tokens.iter().peekable(), &intrinsics, None, debug)?; println!("---Done parsing tokens into {} operations---", operations.len()); validate_function_calls(&operations, &functions, &arrays, debug)?; println!("---Done validating function calls---"); typecheck(&operations, &functions, &intrinsics, &arrays, debug)?; println!("---Done typechecking---"); let start = Instant::now(); let output = if interpret { println!("---Starting to interpret the program---"); Some(interpret_program(&operations, &mut Vec::new(), &functions, &mut arrays, &intrinsics, debug)?) } else { None }; if !interpret { if let Err(err) = generate_assembly_linux_x64(&operations, &functions, &intrinsics, &arrays, debug) { return Err(err.to_string()); } let mut fasm_process = match Command::new("fasm").arg("out.asm").spawn() { Ok(process) => process, Err(err) => return Err(format!("Fasm process error: {}", err.to_string())), }; match fasm_process.wait() { Ok(status) => { if !status.success() { return Err(format!("fasm exited with an error: {}", status)); } } Err(err) => return Err(err.to_string()), } } if run { let process = match Command::new("./out").stdout(Stdio::piped()).stderr(Stdio::piped()).spawn() { Ok(process) => process, Err(err) => return Err(err.to_string()), }; return match process.wait_with_output() { Ok(output) => { match String::from_utf8(output.stdout) { Ok(stdout) => { match String::from_utf8(output.stderr) { Ok(stderr) => { let text = format!("{}{}", stdout, stderr); match output.status.code() { Some(0) => Ok(Some(text)), _ => Err(text), } } Err(err) => Err(err.to_string()), } } Err(err) => Err(err.to_string()), } } Err(err) => Err(err.to_string()), }; } println!("---Done after {:?}---", start.elapsed()); return Ok(output); } fn resolve_imports(tokens: &mut Vec, functions: &mut Vec, file_path: &str, visited_paths: &mut Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Result<(), String> { let mut tokens_iter = tokens.iter(); let mut new_tokens: Vec = Vec::new(); while let Some(token) = tokens_iter.next() { if let Token::Import(line, col) = token { if let Some(Token::StringLit(import_path, _, _)) = tokens_iter.next() { match fs::canonicalize(format!("{}/{}", PathBuf::from(file_path).parent().unwrap_or(&PathBuf::from(".")).display(), import_path)) { Ok(full_import_path) => { if visited_paths.contains(&full_import_path) { println!("--Already visited {}--", full_import_path.display()); } else { visited_paths.push(full_import_path.clone()); let maybe_file_content = fs::read_to_string(full_import_path); match maybe_file_content { Ok(file_content) => { let mut import_tokens: Vec = tokenize(&file_content)?; println!("--Done tokenizing the imported file at {}:{}, got {} tokens--", line, col, tokens.len()); let import_functions = extract_functions(&mut import_tokens, &intrinsics, debug)?; resolve_imports(&mut import_tokens, functions, file_path, visited_paths, intrinsics, debug)?; println!("--Done extracting {} functions--", import_functions.len()); functions.extend(import_functions); println!("--Now totalling {} functions--", functions.len()); } Err(e) => return Err(format!("{}: {}", line!(), e.to_string())) } } } Err(e) => return Err(format!("{}: {} {}/{}", line!(), e.to_string(), file_path, import_path)) } } else { return Err(format!("Expected an import location at {}:{}", line, col)); } } else { new_tokens.push(token.clone()); } } tokens.clear(); tokens.extend_from_slice(&new_tokens); return Ok(()); } struct AssemblyData { strings: String, code: String, arrays: String, } fn merge_assemblies(data: &mut AssemblyData, data2: AssemblyData) { data.arrays += data2.arrays.as_str(); data.code += data2.code.as_str(); data.strings += data2.strings.as_str(); } const ASSEMBLY_LINUX_X64_QUEUE_LENGTH: u32 = 1024; const ASSEMBLY_LINUX_X64_HEADER: &str = "format ELF64 executable 3\n"; const ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE: &str = "\tcmp r12, r13\n\tcmove r12, r14\n\tcmove r13, r14\n"; const ASSEMBLY_LINUX_X64_EXIT: &str = "\tmov rax, 60\n\tmov rdi, 0\n\tsyscall\n"; const ASSEMBLY_LINUX_X64_DYNAMIC_DATA_LENGTH: u32 = 16384; // r12: head // r13: tail // r14: base // r15: dynamic end fn generate_assembly_linux_x64(operations: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, arrays: &Vec, debug: bool) -> Result<(), std::io::Error> { let mut data = AssemblyData { arrays: format!("segment readable writeable\n\tqueue: rq {}\n\tdynamic: rb {}\n", ASSEMBLY_LINUX_X64_QUEUE_LENGTH, ASSEMBLY_LINUX_X64_DYNAMIC_DATA_LENGTH), strings: String::from("segment readable\n\tnewline: db 10\n"), code: String::from("segment executable\n"), }; for array in arrays { data.arrays += format!("\tarr_{}: rq {}\n", array.name, array.length).as_str(); } data.code += "_start:\n"; merge_assemblies(&mut data, generate_assembly_linux_x64_block(operations, functions, intrinsics, arrays, debug)); data.code += ASSEMBLY_LINUX_X64_EXIT; for function in functions { merge_assemblies(&mut data, generate_assembly_linux_x64_function(function.name.as_str(), &function.content, functions, intrinsics, arrays, debug)); } if data.code.contains("call intToStr") { data.code += "intToStr:\n"; data.code += "\tmov rax, rdi\n"; data.code += "\tmov rsi, 10\n"; data.code += "\txor rdi, rdi\n"; data.code += "\txor rdx, rdx\n"; data.code += "\tintToStringLoop:\n"; data.code += "\t\tdiv rsi\n"; data.code += "\t\tadd rdx, 48\n"; data.code += "\t\tpush rdx\n"; data.code += "\t\txor rdx, rdx\n"; data.code += "\t\tinc rdi\n"; data.code += "\t\tcmp rax, 0\n"; data.code += "\t\tjne intToStringLoop\n"; data.code += "\tmov rsi, r15\n"; data.code += "\tmov qword [dynamic+r15], rdi\n"; data.code += "\tadd r15, 8\n"; data.code += "\tintToStringBuildLoop:\n"; data.code += "\t\tcmp rdi, 0\n"; data.code += "\t\tje intToStringBuildLoopEnd\n"; data.code += "\t\tpop rax\n"; data.code += "\t\tmov byte [dynamic+r15], byte al\n"; data.code += "\t\tinc r15\n"; data.code += "\t\tdec rdi\n"; data.code += "\t\tjmp intToStringBuildLoop\n"; data.code += "\tintToStringBuildLoopEnd:\n"; data.code += "\tmov byte [dynamic+r15], 0\n"; data.code += "\tinc r15\n"; data.code += "\tlea rax, [dynamic+rsi]\n"; data.code += "\tret\n"; } if data.code.contains("exception_array_read_out_of_bounds") { data.strings += "\texception_array_oob_msg db \"Attempted array out-of-bounds access\", 10\n"; data.code += "exception_array_read_out_of_bounds:\n"; //TODO: report the passed sizes data.code += "\tmov rax, 1\n"; data.code += "\tmov rdi, 2\n"; // size data.code += "\tmov rdx, 37\n"; // data data.code += "\tmov rsi, exception_array_oob_msg\n"; data.code += "\tsyscall\n"; data.code += "\tmov rax, 60\n"; data.code += "\tmov rdi, -1\n"; data.code += "\tsyscall\n"; } return fs::write("out.asm", format!("{}{}{}{}", ASSEMBLY_LINUX_X64_HEADER, data.code, data.arrays, data.strings)); } fn generate_assembly_linux_x64_array_oob_check(length: i64) -> String { let mut data = String::new(); data += "\t\t;;Array bounds check\n"; data += format!("\tcmp qword rax, {}\n", length).as_str(); data += "\tjge exception_array_read_out_of_bounds\n"; data += "\tcmp qword rax, 0\n"; data += "\tjl exception_array_read_out_of_bounds\n"; data += "\t\t;;Array bounds check over\n"; return data.clone(); } fn generate_assembly_linux_x64_block(operations: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, arrays: &Vec, debug: bool) -> AssemblyData { let mut data = AssemblyData { arrays: String::new(), code: String::new(), strings: String::new(), }; for operation in operations { match operation { Operation::Dequeue(line, col) => { data.code += format!("\t;;deq {}:{}\n", line, col).as_str(); data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; } Operation::Enqueue(datatype, value, line, col) => { data.code += format!("\t;;enq {:?} {} {}:{}\n", datatype, value, line, col).as_str(); match datatype { Datatype::Int => { data.code += format!("\tmov qword [queue+8*r13], {}\n", value).as_str(); } Datatype::Bool => { data.code += format!("\tmov qword [queue+8*r13], {}\n", if value == "true" { 1 } else { 0 }).as_str(); } Datatype::String => { data.strings += format!("\tstr_{}_{}: db {}, {}, {}, {}, {}, {}, {}, {}, \"{}\", 0\n", line, col, value.len() % 256, (value.len() >> 8) % 256, (value.len() >> 16) % 256, (value.len() >> 24) % 256, (value.len() >> 32) % 256, (value.len() >> 40) % 256, (value.len() >> 48) % 256, (value.len() >> 56) % 256, value).as_str(); data.code += format!("\tlea rax, [str_{}_{}]\n", line, col).as_str(); data.code += "\tmov [queue+8*r13], rax\n"; } } data.code += "\tinc r13\n"; } Operation::Requeue(line, col) => { data.code += format!("\t;;req {}:{}\n", line, col).as_str(); data.code += "\tmov rax, [queue+8*r12]\n"; data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tmov [queue+8*r13], rax\n"; data.code += "\tinc r13\n"; } Operation::Swap(line, col) => { data.code += format!("\t;;swp {}:{}\n", line, col).as_str(); data.code += "\tmov rax, [queue+8*r12]\n"; data.code += "\tmov rbx, [queue+8*r12+8]\n"; data.code += "\tadd r12, 2\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tmov [queue+8*r13], rbx\n"; data.code += "\tmov [queue+8*r13+8], rax\n"; data.code += "\tadd r13, 2\n"; } Operation::While(while_operations, line, col) => { data.code += format!("\t;;while {}:{}\n", line, col).as_str(); data.code += "\tcmp qword [queue+8*r12], 0\n"; data.code += format!("\tje while_{}_{}_end\n", line, col).as_str(); data.code += format!("while_{}_{}:\n", line, col).as_str(); data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; merge_assemblies(&mut data, generate_assembly_linux_x64_block(while_operations, functions, intrinsics, arrays, debug)); data.code += "\tcmp qword [queue+8*r12], 0\n"; data.code += format!("\tjne while_{}_{}\n", line, col).as_str(); data.code += format!("while_{}_{}_end:\n", line, col).as_str(); data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; } Operation::If(if_operations, maybe_else_operations, line, col) => { data.code += format!("\t;;if {}:{}\n", line, col).as_str(); data.code += "\tcmp qword [queue+8*r12], 0\n"; data.code += format!("\tje else_{}_{}\n", line, col).as_str(); data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; merge_assemblies(&mut data, generate_assembly_linux_x64_block(if_operations, functions, intrinsics, arrays, debug)); data.code += format!("\tjmp if_{}_{}_end\n", line, col).as_str(); data.code += format!("else_{}_{}:\n", line, col).as_str(); data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; if let Some(else_operations) = maybe_else_operations { merge_assemblies(&mut data, generate_assembly_linux_x64_block(else_operations, functions, intrinsics, arrays, debug)); } data.code += format!("if_{}_{}_end:\n", line, col).as_str(); } Operation::Dup(line, col) => { data.code += format!("\t;;dup {}:{}\n", line, col).as_str(); data.code += "\tmov rax, [queue+8*r12]\n"; data.code += "\tmov [queue+8*r13], rax\n"; data.code += "\tinc r13\n"; } Operation::Intrinsic(name, line, col) => { data.code += format!("\t;;intrinsic {} {}:{}\n", name, line, col).as_str(); match name.as_str() { "print" => { // For now printing numbers directly is unsupported data.code += "\tmov rax, 1\n"; data.code += "\tmov rdi, 1\n"; // load address data.code += "\tmov rsi, [queue+8*r12]\n"; // size data.code += "\tmov rdx, [rsi]\n"; // data data.code += "\tlea rsi, [rsi+8]\n"; // incorporate the null byte //data.code += "\tinc rdx\n"; data.code += "\tsyscall\n"; // TODO: factor this out data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; } "println" => { // For now printing numbers directly is unsupported data.code += "\tmov rax, 1\n"; data.code += "\tmov rdi, 1\n"; // load address data.code += "\tmov rsi, [queue+8*r12]\n"; // size data.code += "\tmov rdx, [rsi]\n"; // data data.code += "\tlea rsi, [rsi+8]\n"; // incorporate the null byte //data.code += "\tinc rdx\n"; data.code += "\tsyscall\n"; // TODO: factor this out data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; // TODO: Don't syscall twice data.code += "\tmov rax, 1\n"; data.code += "\tlea rsi, [newline]\n"; data.code += "\tmov rdx, 1\n"; data.code += "\tsyscall\n"; } "intToStr" => { data.code += "\tmov qword rdi, [queue+8*r12]\n"; data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tcall intToStr\n"; data.code += "\tmov [queue+8*r13], rax\n"; data.code += "\tinc r13\n"; } "-" => { data.code += "\tmov qword rax, [queue+8*r12]\n"; data.code += "\tinc r12\n"; data.code += "\tmov qword rbx, [queue+8*r12]\n"; data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tsub rax, rbx\n"; data.code += "\tmov [queue+8*r13], rax\n"; data.code += "\tinc r13\n"; } "+" => { data.code += "\tmov qword rax, [queue+8*r12]\n"; data.code += "\tinc r12\n"; data.code += "\tmov qword rbx, [queue+8*r12]\n"; data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tadd rax, rbx\n"; data.code += "\tmov [queue+8*r13], rax\n"; data.code += "\tinc r13\n"; } "*" => { data.code += "\tmov qword rax, [queue+8*r12]\n"; data.code += "\tinc r12\n"; data.code += "\tmov qword rbx, [queue+8*r12]\n"; data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tmul rbx\n"; data.code += "\tmov [queue+8*r13], rax\n"; data.code += "\tinc r13\n"; } ">" => { data.code += "\tmov rbx, 0\n"; data.code += "\tmov rcx, 1\n"; data.code += "\tmov rax, [queue+8*r12]\n"; data.code += "\tcmp qword rax, [queue+8*r12+8]\n"; data.code += "\tcmovg rbx, rcx\n"; data.code += "\tadd r12, 2\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tmov qword [queue+8*r13], rbx\n"; data.code += "\tinc r13\n"; } "<" => { data.code += "\tmov rbx, 0\n"; data.code += "\tmov rcx, 1\n"; data.code += "\tmov rax, [queue+8*r12]\n"; data.code += "\tcmp qword rax, [queue+8*r12+8]\n"; data.code += "\tcmovl rbx, rcx\n"; data.code += "\tadd r12, 2\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tmov qword [queue+8*r13], rbx\n"; data.code += "\tinc r13\n"; } ">=" => { data.code += "\tmov rbx, 0\n"; data.code += "\tmov rcx, 1\n"; data.code += "\tmov rax, [queue+8*r12]\n"; data.code += "\tcmp qword rax, [queue+8*r12+8]\n"; data.code += "\tcmovge rbx, rcx\n"; data.code += "\tadd r12, 2\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tmov qword [queue+8*r13], rbx\n"; data.code += "\tinc r13\n"; } "<=" => { data.code += "\tmov rbx, 0\n"; data.code += "\tmov rcx, 1\n"; data.code += "\tmov rax, [queue+8*r12]\n"; data.code += "\tcmp qword rax, [queue+8*r12+8]\n"; data.code += "\tcmovle rbx, rcx\n"; data.code += "\tadd r12, 2\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tmov qword [queue+8*r13], rbx\n"; data.code += "\tinc r13\n"; } "==" => { data.code += "\tmov rbx, 0\n"; data.code += "\tmov rcx, 1\n"; data.code += "\tmov rax, [queue+8*r12]\n"; data.code += "\tcmp qword rax, [queue+8*r12+8]\n"; data.code += "\tcmove rbx, rcx\n"; data.code += "\tadd r12, 2\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tmov qword [queue+8*r13], rbx\n"; data.code += "\tinc r13\n"; } "&&" => { data.code += "\tmov rax, [queue+8*r12]\n"; data.code += "\tmov rbx, [queue+8*r12+8]\n"; data.code += "\tand rax, rbx\n"; data.code += "\tadd r12, 2\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tmov [queue+8*r13], rax\n"; data.code += "\tinc r13\n"; } _ => todo!("intrinsic {} {}:{}", name, line, col) } } Operation::Apply(name, word, line, col) => { let array = arrays.iter().find(|x| &x.name == name).unwrap(); data.code += format!("\t;;apply {}.{} {}:{}\n", name, word, line, col).as_str(); match word.as_str() { "read" => { data.code += "\tmov rax, [queue+8*r12]\n"; data.code += generate_assembly_linux_x64_array_oob_check(array.length).as_str(); data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += format!("\tmov qword rbx, [arr_{}+8*rax]\n", name).as_str(); data.code += "\tmov qword [queue+8*r13], rbx\n"; data.code += "\tinc r13\n"; } "write" => { data.code += "\tmov rax, [queue+8*r12]\n"; data.code += generate_assembly_linux_x64_array_oob_check(array.length).as_str(); data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tmov qword rbx, [queue+8*r12]\n"; data.code += format!("\tmov qword [arr_{}+8*rax], rbx\n", name).as_str(); data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; } "length" => { data.code += format!("\tmov qword [queue+8*r13], {}\n", array.length).as_str(); data.code += "\tinc r13\n"; } _ => todo!("apply {}", word) } } Operation::FunctionCall(name, line, col) => { data.code += format!("\t;;func call {} {}:{}\n", name, line, col).as_str(); let function = functions.iter().find(|x| &x.name == name).unwrap(); for _ in 0..function.ins.len() { data.code += "\tmov rax, [queue+8*r12]\n"; data.code += "\tinc r12\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; data.code += "\tmov [queue+8*r13], rax\n"; data.code += "\tinc r13\n"; } data.code += "\t;; move pointers\n"; // save the current base data.code += "\tpush r14\n"; // save the current head data.code += "\tpush r12\n"; // prepare the layout data.code += "\tmov r14, r13\n"; data.code += format!("\tsub r14, {}\n", function.ins.len()).as_str(); data.code += "\tmov r12, r14\n"; // call data.code += format!("\tcall {}\n", name).as_str(); // move the sub-queue back to the base for _ in 0..function.outs.len() { data.code += "\tmov rax, [queue+8*r12]\n"; data.code += "\tmov [queue+8*r14], rax\n"; data.code += "\tinc r12\n"; data.code += "\tinc r14\n"; } // restore the tail data.code += "\tmov r13, r14\n"; // restore the head data.code += "\tpop r12\n"; // restore the base data.code += "\tpop r14\n"; } Operation::Interrupt(line, col) => { data.code += format!("\t;;interrupt {}:{}\n", line, col).as_str(); data.code += "lea r8, [queue]\n"; data.code += format!("mov r9, {}\n", 1000*line + col).as_str(); data.code += "int3\n"; } _ => todo!("{:?}", operation) } } return data; } fn generate_assembly_linux_x64_function(name: &str, operations: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, arrays: &Vec, debug: bool) -> AssemblyData { let mut data = AssemblyData { arrays: String::new(), code: format!("{}:\n", name), strings: String::new(), }; merge_assemblies(&mut data, generate_assembly_linux_x64_block(operations, functions, intrinsics, arrays, debug)); data.code += "\tret\n"; return data; } fn interpret_program(operations: &Vec, queue: &mut Vec, functions: &Vec, arrays: &mut Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Result { let mut output = String::new(); for operation in operations { if debug { println!("before: {:?}: {:?}, '{}'", operation, queue, output); } match operation { Operation::Dequeue(_, _) => { queue.remove(0); } Operation::Enqueue(_, value, _, _) => { queue.push(value.clone()); } Operation::Requeue(_, _) => { let val = queue.remove(0); queue.push(val); } Operation::Dup(_, _) => { let val = queue.get(0).unwrap(); queue.push(val.clone()); } Operation::Swap(_, _) => { let first = queue.remove(0); let second = queue.remove(0); queue.push(second); queue.push(first); } Operation::FunctionCall(function_name, _, _) => { let function = functions.iter().find(|x| &x.name == function_name).unwrap(); let function_context: &mut Vec = &mut Vec::new(); for _ in 0..function.ins.len() { let val = queue.remove(0); function_context.push(val); } output += interpret_program(&function.content, function_context, functions, arrays, intrinsics, debug)?.as_str(); for val in function_context { queue.push(val.to_string()); } } Operation::If(if_block, maybe_else_block, _, _) => { let val = queue.remove(0); if val == "true" { output += interpret_program(if_block, queue, functions, arrays, intrinsics, debug)?.as_str(); } else if let Some(else_block) = maybe_else_block { output += interpret_program(else_block, queue, functions, arrays, intrinsics, debug)?.as_str(); } } Operation::Intrinsic(intrinsic_name, line, col) => { match intrinsic_name.as_str() { "print" => { output += format!("{}", queue.remove(0)).as_str(); } "-" => { let minuend = queue.remove(0).parse::().unwrap(); let subtrahend = queue.remove(0).parse::().unwrap(); queue.push((minuend - subtrahend).to_string()); } "+" => { let addend1 = queue.remove(0).parse::().unwrap(); let addend2 = queue.remove(0).parse::().unwrap(); queue.push((addend1 + addend2).to_string()); } "*" => { let multiplicant1 = queue.remove(0).parse::().unwrap(); let multiplicant2 = queue.remove(0).parse::().unwrap(); queue.push((multiplicant1 * multiplicant2).to_string()); } ">" => { let first = queue.remove(0).parse::().unwrap(); let second = queue.remove(0).parse::().unwrap(); queue.push((first > second).to_string()); } "<" => { let first = queue.remove(0).parse::().unwrap(); let second = queue.remove(0).parse::().unwrap(); queue.push((first < second).to_string()); } ">=" => { let first = queue.remove(0).parse::().unwrap(); let second = queue.remove(0).parse::().unwrap(); queue.push((first >= second).to_string()); } "<=" => { let first = queue.remove(0).parse::().unwrap(); let second = queue.remove(0).parse::().unwrap(); queue.push((first <= second).to_string()); } "==" => { let first = queue.remove(0).parse::().unwrap(); let second = queue.remove(0).parse::().unwrap(); queue.push((first == second).to_string()); } "!=" => { let first = queue.remove(0).parse::().unwrap(); let second = queue.remove(0).parse::().unwrap(); queue.push((first != second).to_string()); } "&&" => { let first = queue.remove(0).parse::().unwrap(); let second = queue.remove(0).parse::().unwrap(); queue.push((first && second).to_string()); } "println" => { output += format!("{}\n", queue.remove(0)).as_str(); } "intToStr" => { let val = queue.remove(0).clone(); queue.push(val); } _ => { return Err(format!("Unexpected intrinsic '{}' at {}:{}", intrinsic_name, line, col)); } } } Operation::Apply(name, word, line, col) => { let arr: &mut Arr = arrays.iter_mut().find(|x| &x.name == name).unwrap(); match word.as_str() { "write" => { let position: i64 = queue.remove(0).parse::().unwrap(); if position >= arr.length { //return Err(format!("Attempted an out of bounds write for array {} ({} >= {}) at {}:{}", arr.name, position, arr.length, line, col)); return Err(String::from("Attempted array out-of-bounds access\n")); } if position < 0 { //return Err(format!("Attempted an out of bounds write for array {} ({} < 0) at {}:{}", arr.name, position, line, col)); return Err(String::from("Attempted array out-of-bounds access\n")); } let data = queue.remove(0); if debug { println!("write before: {} {} {:?}", position, data, arr); } arr.data[position as usize] = data; if debug { println!("write after: {:?}", arr); } } "read" => { let position: i64 = queue.remove(0).parse::().unwrap(); if position >= arr.length { //return Err(format!("Attempted an out of bounds read for array {} ({} >= {}) at {}:{}", arr.name, position, arr.length, line, col)); return Err(String::from("Attempted array out-of-bounds access\n")); } if position < 0 { //return Err(format!("Attempted an out of bounds read for array {} ({} < 0) at {}:{}", arr.name, position, line, col)); return Err(String::from("Attempted array out-of-bounds access\n")); } queue.push(arr.data[position as usize].clone()); } "length" => { queue.push(arr.length.to_string()); } _ => return Err(format!("Unexpected application '{}' at {}:{}", word, line, col)) } } Operation::While(while_block, _, _) => { loop { let val = queue.remove(0); if val == "false" { break; } output += interpret_program(while_block, queue, functions, arrays, intrinsics, debug)?.as_str(); } } Operation::Depth(_, _) => { let depth = queue.len(); queue.push(depth.to_string()); } Operation::QueueDiagnostic(line, col) => { println!("---Queue state at {}:{}---\nlength: {}\n{:?}\n------------------------------", line, col, queue.len(), queue); } Operation::Interrupt(_, _) => {} } if debug { println!("after: {:?}: {:?}, '{}'", operation, queue, output); } } return Ok(output); } fn typecheck(operations: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, arrays: &Vec, debug: bool) -> Result<(), String> { for function in functions { if debug { println!("Now typechecking function '{}'", function.name); } typecheck_block(&function.content, &function.ins, &function.outs, functions, intrinsics, arrays, debug)?; if debug { println!("Successfully typechecked function '{}'", function.name); } } if debug { println!("Now typechecking main operations"); } typecheck_block(operations, &Vec::new(), &Vec::new(), functions, intrinsics, arrays, debug)?; if debug { println!("Successfully typechecked main operations"); } return Ok(()); } fn typecheck_block(operations: &Vec, ins: &Vec, outs: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, arrays: &Vec, debug: bool) -> Result<(), String> { let actual_outs = get_return_type(operations, ins, functions, intrinsics, arrays, debug)?; if &actual_outs != outs { let (line, col) = match operations.last() { Some(operation) => { match operation { Operation::Interrupt(line, col) | Operation::Enqueue(_, _, line, col) | Operation::Dequeue(line, col) | Operation::Requeue(line, col) | Operation::Dup(line, col) | Operation::Swap(line, col) | Operation::FunctionCall(_, line, col) | Operation::If(_, _, line, col) | Operation::Intrinsic(_, line, col) | Operation::While(_, line, col) | Operation::QueueDiagnostic(line, col) | Operation::Apply(_, _, line, col) | Operation::Depth(line, col) => (*line, *col), } } None => (-1, -1) }; return Err(format!("Wrong queue state at the end of a block, expected {:?} but got {:?} at {}:{}", outs, actual_outs, line, col)); } return Ok(()); } fn get_return_type(operations: &Vec, ins: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, arrays: &Vec, debug: bool) -> Result, String> { let type_queue: &mut Vec = &mut Vec::new(); type_queue.extend_from_slice(ins); let mut debug_string = String::from(""); for operation in operations { if debug { debug_string = format!("operation: {:?}: {:?}", operation, type_queue); } match operation { Operation::Dequeue(line, col) => { if type_queue.is_empty() { return Err(format!("Attempted to dequeue an element while the queue was empty at {}:{}", line, col)); } type_queue.remove(0); } Operation::Enqueue(datatype, _, _, _) => { type_queue.push(*datatype); } Operation::Dup(line, col) => { if let Some(typ) = type_queue.get(0) { type_queue.push(typ.clone()); } else { return Err(format!("Attempted to dup an element while the queue was empty at {}:{}", line, col)); } } Operation::Requeue(line, col) => { if type_queue.is_empty() { return Err(format!("Attempted to requeue an element while the queue was empty at {}:{}", line, col)); } let typ = type_queue.remove(0); type_queue.push(typ); } Operation::Swap(line, col) => { if type_queue.is_empty() { return Err(format!("Attempted to get the first element for a swap while the queue was empty at {}:{}", line, col)); } let first_typ = type_queue.remove(0); if type_queue.is_empty() { return Err(format!("Attempted to get the second element for a swap while the queue was empty at {}:{}", line, col)); } let second_typ = type_queue.remove(0); type_queue.push(second_typ); type_queue.push(first_typ); } Operation::FunctionCall(function_name, line, col) => { let function = functions.iter().find(|x| &x.name == function_name).unwrap(); if function.ins.len() > type_queue.len() { return Err(format!("Attempted to call function '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", function.name, line, col, function.ins, type_queue)); } for in_type in &function.ins { let actual_type = type_queue.remove(0); if in_type != &actual_type { return Err(format!("Attempted to call function '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", function.name, line, col, in_type, actual_type)); } } type_queue.extend_from_slice(&function.outs); } Operation::If(if_block, maybe_else_block, line, col) => { if type_queue.is_empty() { return Err(format!("Encountered if block with an empty queue at {}:{}", line, col)); } let comparison_type = type_queue.remove(0); if comparison_type != Datatype::Bool { return Err(format!("Expected a Bool as an if condition but got {:?} instead at {}:{}", comparison_type, line, col)); } if debug { println!("Starting to typecheck if block"); } let if_ret = get_return_type(if_block, &type_queue, functions, intrinsics, arrays, debug)?; let else_ret = if let Some(else_block) = maybe_else_block { if debug { println!("Starting to typecheck else block"); } get_return_type(else_block, &type_queue, functions, intrinsics, arrays, debug)? } else { type_queue.clone() }; if if_ret != else_ret { return Err(format!("Incompatible queue states after if/else construction, expected {:?} but got {:?}", if_ret, else_ret)); } type_queue.clear(); type_queue.extend_from_slice(&if_ret); } Operation::Intrinsic(intrinsic_name, line, col) => { let io = intrinsics.get(intrinsic_name.as_str()).unwrap(); if io.0.len() > type_queue.len() { return Err(format!("Attempted to call intrinsic '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", intrinsic_name, line, col, io.0, type_queue)); } for in_type in &io.0 { let actual_type = type_queue.remove(0); if in_type != &actual_type { return Err(format!("Attempted to call intrinsic '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", intrinsic_name, line, col, in_type, actual_type)); } } type_queue.extend_from_slice(&io.1); } Operation::While(while_block, line, col) => { if type_queue.is_empty() { return Err(format!("Encountered while block with an empty queue at {}:{}", line, col)); } let comparison_type = type_queue.remove(0); if comparison_type != Datatype::Bool { return Err(format!("Expected a Bool as a while condition but got {:?} instead at {}:{}", comparison_type, line, col)); } if debug { println!("Starting to typecheck while block"); } let mut outs = type_queue.clone(); outs.insert(0, Datatype::Bool); typecheck_block(while_block, type_queue, &outs, functions, intrinsics, arrays, debug)?; } Operation::Depth(_, _) => { type_queue.push(Datatype::Int); } Operation::QueueDiagnostic(line, col) => { println!("---Type queue state at {}:{}---\nlength: {}\n{:?}\n------------------------------", line, col, type_queue.len(), type_queue); } Operation::Interrupt(_, _) => {} Operation::Apply(name, word, line, col) => { match word.as_str() { "write" => { if type_queue.is_empty() || type_queue.remove(0) != Datatype::Int { return Err(format!("Expected a position for a write application at {}:{}", line, col)); } let expected_type = arrays.iter().find(|x| &x.name == name).unwrap().datatype; if type_queue.is_empty() { return Err(format!("Expected data for a write application at {}:{}", line, col)); } let actual_type = type_queue.remove(0); if actual_type != expected_type { return Err(format!("Expected a {:?} value but got a {:?} value at {}:{}", expected_type, actual_type, line, col)); } } "read" => { if type_queue.is_empty() || type_queue.remove(0) != Datatype::Int { return Err(format!("Expected a position for a read application at {}:{}", line, col)); } let typ = arrays.iter().find(|x| &x.name == name).unwrap().datatype; type_queue.push(typ); } "length" => { type_queue.push(Datatype::Int); } _ => return Err(format!("Encountered unknown application '{}' at {}:{}", word, line, col)) } } } if debug { println!("{} => {:?}", debug_string, type_queue); } } return Ok(type_queue.clone()); } fn validate_function_calls(operations: &Vec, functions: &Vec, arrays: &Vec, debug: bool) -> Result<(), String> { for function in functions { validate_function_calls_in_block(&function.content, functions, arrays, debug)?; if debug { println!("Successfully validated function calls in function '{}'", function.name); } } validate_function_calls_in_block(operations, functions, arrays, debug)?; if debug { println!("Successfully validated function calls in main operations"); } return Ok(()); } fn validate_function_calls_in_block(block: &Vec, functions: &Vec, arrays: &Vec, debug: bool) -> Result<(), String> { for operation in block { match operation { Operation::Depth(_, _) | Operation::QueueDiagnostic(_, _) | Operation::Intrinsic(_, _, _) | Operation::Enqueue(_, _, _, _) | Operation::Dequeue(_, _) | Operation::Requeue(_, _) | Operation::Dup(_, _) | Operation::Swap(_, _) | Operation::Apply(_, _, _, _) | Operation::Interrupt(_, _) => {}, Operation::FunctionCall(function_name, line, col) => { if !functions.iter().any(|x| &x.name == function_name) { return Err(format!("Call to unknown function '{}' at {}:{}", function_name, line, col)); } } Operation::If(if_block, maybe_else_block, _, _) => { validate_function_calls_in_block(if_block, functions, arrays, debug)?; if let Some(else_block) = maybe_else_block { validate_function_calls_in_block(else_block, functions, arrays, debug)?; } } Operation::While(while_block, _, _) => { validate_function_calls_in_block(while_block, functions, arrays, debug)?; } } } return Ok(()); } fn extract_arrays(tokens: &mut Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, functions: &Vec, debug: bool) -> Result, String> { let mut tokens_iter = tokens.iter().peekable(); let mut arrays: Vec = Vec::new(); let mut new_tokens: Vec = Vec::new(); while let Some(token) = tokens_iter.next() { if let Token::Keyword(word, line, col) = token { if word == "arr" { if debug { println!("Found an array at {}:{}", line, col); } if let Some(Token::Keyword(name, _, _)) = tokens_iter.next() { if functions.iter().any(|x| &x.name == name) { return Err(format!("Cannot redeclare an array with the same name as a function {}:{}", line, col)); } if arrays.iter().any(|x| &x.name == name) { return Err(format!("Cannot redeclare an array with the same name as an array {}:{}", line, col)); } if intrinsics.contains_key(name.as_str()) { return Err(format!("An array cannot have the same name as an intrinsic ({}) at {}:{}", name, line, col)); } if let Some(Token::Keyword(open_curly, _, _)) = tokens_iter.next() { if open_curly != "{" { return Err(format!("Expected '{{' in array declaration at {}:{}", line, col)); } } else { return Err(format!("Reached the end of the file while parsing an array at {}:{}", line, col)); } if let Some(Token::Keyword(typ, _, _)) = tokens_iter.next() { let datatype = str_to_datatype(typ, *line, *col)?; if let Some(Token::IntLit(size_str, _, _)) = tokens_iter.next() { let size = size_str.parse::().unwrap(); if let Some(Token::Keyword(close_curly, _, _)) = tokens_iter.next() { if close_curly != "}" { return Err(format!("Expected '}}' in array declaration at {}:{}", line, col)); } } else { return Err(format!("Reached the end of the file while parsing an array at {}:{}", line, col)); } let mut data: Vec = Vec::new(); let default_val = match datatype { Datatype::String => String::new(), Datatype::Bool => String::from("false"), Datatype::Int => String::from("0"), }; for _ in 0..size { data.push(default_val.clone()); } arrays.push(Arr { name: sanitize_name(name.clone()), datatype, length: size , data }); } } else { return Err(format!("Reached the end of the file while parsing an array at {}:{}", line, col)) } } else { return Err(format!("Expected array name, at {}:{}", line, col)); } } else { new_tokens.push(token.clone()); } } else { new_tokens.push(token.clone()); } } tokens.clear(); tokens.extend_from_slice(&new_tokens); return Ok(arrays); } fn sanitize_name(name: String) -> String { return name.replace("-", "_").replace("+", "_"); } fn str_to_datatype(s: &str, line: i32, col: i32) -> Result { match s { //"any" => Ok(Datatype::Any), "bool" => Ok(Datatype::Bool), "int" => Ok(Datatype::Int), "str" => Ok(Datatype::String), _ => return Err(format!("Expected a datatype for the array, got {} instead at {}:{}", s, line, col)) } } fn extract_functions(tokens: &mut Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Result, String> { let mut tokens_iter = tokens.iter().peekable(); let mut functions: Vec = Vec::new(); let mut new_tokens: Vec = Vec::new(); while let Some(token) = tokens_iter.next() { if let Token::Keyword(word, line, col) = token { if word == "function" { if debug { println!("Found a function at {}:{}", line, col); } let mut ins: Vec = Vec::new(); loop { let maybe_token = tokens_iter.next(); match maybe_token { Some(token) => { match token { Token::IntLit(_, line, col) | Token::StringLit(_, line, col) | Token::BoolLit(_, line, col) | Token::Import(line, col) | Token::Apply(_, _, line, col) => { return Err(format!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col)); } Token::Keyword(word, line, col) => { if word == "=>" { break; } match word.as_str() { //"any" => ins.push(Datatype::Any), "str" => ins.push(Datatype::String), "int" => ins.push(Datatype::Int), "bool" => ins.push(Datatype::Bool), _ => return Err(format!("Expected input parameters for a function but got {} instead at {}:{}", word, line, col)) } } } } None => return Err(format!("Unexpected end of file while extracting a function")) } } if debug { println!("ins: {:?}", ins); } let mut outs: Vec = Vec::new(); loop { let maybe_token = tokens_iter.next(); match maybe_token { Some(token) => { match token { Token::IntLit(_, line, col) | Token::StringLit(_, line, col) | Token::BoolLit(_, line, col) | Token::Import(line, col) | Token::Apply(_, _, line, col) => { return Err(format!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col)); } Token::Keyword(word, line, col) => { match word.as_str() { //"any" => outs.push(Datatype::Any), "str" => outs.push(Datatype::String), "int" => outs.push(Datatype::Int), "bool" => outs.push(Datatype::Bool), "{" | "}" | "deq" | "req" | "dup" | "swp" | "true" | "false" | "depth" | "???" => return Err(format!("Expected function name but got {} at {}:{}", word, line, col)), _ => { if functions.iter().any(|x| &x.name == word) { return Err(format!("Redeclaration of function '{}' at {}:{}", word, line, col)); } if intrinsics.contains_key(word.as_str()) { return Err(format!("Function name {} at {}:{} is already an intrinsic", word, line, col)); } if debug { println!("outs: {:?}", outs); } let block = parse_block(&mut tokens_iter, intrinsics, debug)?; functions.push(Function {name: sanitize_name(word.clone()), ins, outs, content: block}); break; } } } } } None => return Err(format!("Unexpected end of file while extracting a function")) } } } else { new_tokens.push(token.clone()); } } else { new_tokens.push(token.clone()); } } tokens.clear(); tokens.extend_from_slice(&new_tokens); return Ok(functions); } fn parse_block(tokens_iter: &mut Peekable>, intrinsics: &HashMap<&str, (Vec, Vec)>, debug: bool) -> Result, String> { if let Some(Token::Keyword(word, line, col)) = tokens_iter.next() { if word != "{" { return Err(format!("Expected '{{' to open a block but got {} at {}:{}", word, line, col)); } } else { return Err(format!("Expected '{{' to open a block")); } return parse_until_delimiter(tokens_iter, intrinsics, Some("}"), debug); } fn parse_until_delimiter(tokens_iter: &mut Peekable>, intrinsics: &HashMap<&str, (Vec, Vec)>, delimiter: Option<&str>, debug: bool) -> Result, String> { let mut operations: Vec = Vec::new(); loop { let maybe_token = tokens_iter.next(); match maybe_token { Some(token) => { match token { Token::IntLit(value, line, col) => { operations.push(Operation::Enqueue(Datatype::Int, value.clone(), *line, *col)); } Token::StringLit(value, line, col) => { operations.push(Operation::Enqueue(Datatype::String, value.clone(), *line, *col)); } Token::BoolLit(value, line, col) => { operations.push(Operation::Enqueue(Datatype::Bool, value.clone(), *line, *col)); } Token::Apply(name, word, line, col) => { operations.push(Operation::Apply(sanitize_name(name.clone()), word.clone(), *line, *col)); } Token::Import(line, col) => { return Err(format!("Unexpected import token at {}:{}, should have been resolved before, probably a compiler bug", line, col)); } Token::Keyword(word, line, col) => { if intrinsics.contains_key(word.as_str()) { operations.push(Operation::Intrinsic(word.clone(), *line, *col)); } else if word == "if" { let block = parse_block(tokens_iter, intrinsics, debug)?; let else_block = if let Some(Token::Keyword(maybe_else, _, _)) = tokens_iter.peek() { if maybe_else == "else" { tokens_iter.next(); Some(parse_block(tokens_iter, intrinsics, debug)?) } else { None } } else { None }; operations.push(Operation::If(block, else_block, *line, *col)); } else if word == "while" { operations.push(Operation::While(parse_block(tokens_iter, intrinsics, debug)?, *line, *col)); } else if word == "deq" { operations.push(Operation::Dequeue(*line, *col)); } else if word == "req" { operations.push(Operation::Requeue(*line, *col)); } else if word == "dup" { operations.push(Operation::Dup(*line, *col)); } else if word == "swp" { operations.push(Operation::Swap(*line, *col)); } else if word == "depth" { operations.push(Operation::Depth(*line, *col)); } else if word == "???" { operations.push(Operation::QueueDiagnostic(*line, *col)); } else if word == "interrupt" { operations.push(Operation::Interrupt(*line, *col)); } else if Some(word.as_str()) == delimiter { return Ok(operations); } else if word == "{" || word == "function" { return Err(format!("Unexpected keyword {} at {}:{}", word, line, col)); } else { operations.push(Operation::FunctionCall(sanitize_name(word.clone()), *line, *col)); } } } } None => { if delimiter.is_some() { return Err(format!("Reached the end of the file while parsing a block")); } else { return Ok(operations); } } } } } fn usage() { println!("Usage: kurz -c path/to/file"); exit(0); } fn tokenize(text: &str) -> Result, String> { let mut tokens: Vec = Vec::new(); let mut line = 1; let mut col = 1; let mut state = TokenizerState::Whitespace; let mut word = String::new(); let mut iter = text.chars().peekable(); let mut application_name = String::new(); while let Some(ch) = iter.next() { if ch == '/' && iter.peek() == Some(&'/') { state = TokenizerState::Comment; } match state { TokenizerState::Comment => { if ch == '\n' { state = TokenizerState::Whitespace; } } TokenizerState::Whitespace => { // If ch is whitespace, do nothing if !ch.is_whitespace() { match ch { '"' => { state = TokenizerState::Quote; } _ => { state = TokenizerState::Keyword; word.push(ch); } } } } TokenizerState::Quote => { if ch == '"' { state = TokenizerState::Whitespace; tokens.push(Token::StringLit(word.clone().replace("\\n", "\n"), line, col)); word.clear(); } else { word.push(ch); } } TokenizerState::Keyword => { if ch.is_whitespace() { state = TokenizerState::Whitespace; if application_name.is_empty() { if let Ok(_) = word.parse::() { tokens.push(Token::IntLit(word.clone(), line, col)); } else if word == "true" || word == "false" { tokens.push(Token::BoolLit(word.clone(), line, col)); } else if word == "import" { tokens.push(Token::Import(line, col)); } else { tokens.push(Token::Keyword(word.clone(), line, col)); } } else { tokens.push(Token::Apply(sanitize_name(application_name.clone()), word.clone(), line, col)); application_name.clear(); } word.clear(); } else { match ch { '"' => return Err(format!("Having '\"' in the middle of a word is not allowed")), '.' => { application_name = word.clone(); word.clear(); } _ => { word.push(ch); } } } } } col += 1; if ch == '\n' { col = 1; line += 1; } } match state { TokenizerState::Quote => { return Err(format!("Encountered EOF before closing string")); } TokenizerState::Whitespace | TokenizerState::Comment => {}, TokenizerState::Keyword => { if application_name.is_empty() { tokens.push(Token::Keyword(word.clone(), line, col)); } else { tokens.push(Token::Apply(sanitize_name(application_name.clone()), word.clone(), line, col)); } } } Ok(tokens) }