From 5bae80e9aa018294baf25cbe834f58747a2fd73e Mon Sep 17 00:00:00 2001 From: 0x4261756D <–38735823+0x4261756D@users.noreply.github.com> Date: Tue, 3 Jan 2023 17:07:57 +0100 Subject: [PATCH] Assembly generation, WIP --- src/main.rs | 277 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 253 insertions(+), 24 deletions(-) diff --git a/src/main.rs b/src/main.rs index 421fc7d..68ca38b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -246,34 +246,69 @@ fn merge_assemblies(data: &mut AssemblyData, data2: AssemblyData) const ASSEMBLY_LINUX_X64_QUEUE_LENGTH: u32 = 1024; const ASSEMBLY_LINUX_X64_HEADER: &str = "format ELF64 executable 3\n"; -const ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE: &str = "\tcmp r8, r9\n\tcmove r8, r10\n\tcmove r9, r10\n"; +const ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE: &str = "\tcmp r11, r12\n\tcmove r11, r13\n\tcmove r12, r13\n"; const ASSEMBLY_LINUX_X64_EXIT: &str = "\tmov rax, 60\n\tmov rdi, 0\n\tsyscall\n"; +const ASSEMBLY_LINUX_X64_DYNAMIC_DATA_LENGTH: u32 = 16384; fn generate_assembly_linux_x64(operations: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, arrays: &Vec, debug: bool) -> Result<(), std::io::Error> { let mut data = AssemblyData { - arrays: format!("segment readable writeable\n\tqueue: rq {}\n", ASSEMBLY_LINUX_X64_QUEUE_LENGTH), - strings: String::from("segment readable\n"), + arrays: format!("segment readable writeable\n\tqueue: rq {}\n\tdynamic: rb {}\n", ASSEMBLY_LINUX_X64_QUEUE_LENGTH, ASSEMBLY_LINUX_X64_DYNAMIC_DATA_LENGTH), + strings: String::from("segment readable\nnewline: db 10\n"), code: String::from("segment executable\n"), }; for array in arrays { data.arrays += format!("\tarr_{}: rq {}\n", array.name, array.length).as_str(); } + merge_assemblies(&mut data, generate_assembly_linux_x64_function("_start", operations, functions, intrinsics, arrays, debug)); + data.code += ASSEMBLY_LINUX_X64_EXIT; for function in functions { merge_assemblies(&mut data, generate_assembly_linux_x64_function(function.name.as_str(), &function.content, functions, intrinsics, arrays, debug)); } - merge_assemblies(&mut data, generate_assembly_linux_x64_function("_start", operations, functions, intrinsics, arrays, debug)); - data.code += ASSEMBLY_LINUX_X64_EXIT; + + if data.code.contains("call intToStr") + { + data.code += "intToStr:\n"; + data.code += "\tmov rax, rdi\n"; + data.code += "\tmov rsi, 10\n"; + data.code += "\txor rdi, rdi\n"; + data.code += "\tintToStringLoop:\n"; + data.code += "\t\tdiv rsi\n"; + data.code += "\t\tadd rdx, 48\n"; + data.code += "\t\tpush rdx\n"; + data.code += "\t\txor rdx, rdx\n"; + data.code += "\t\tinc rdi\n"; + data.code += "\t\tcmp rax, 0\n"; + data.code += "\t\tjne intToStringLoop\n"; + data.code += "\tmov rsi, r14\n"; + data.code += "\tmov qword [dynamic+r14], rdi\n"; + data.code += "\tadd r14, 8\n"; + data.code += "\tintToStringBuildLoop:\n"; + data.code += "\t\tcmp rdi, 0\n"; + data.code += "\t\tje intToStringBuildLoopEnd\n"; + data.code += "\t\tpop rax\n"; + data.code += "\t\tmov byte [dynamic+r14], byte al\n"; + data.code += "\t\tinc r14\n"; + data.code += "\t\tdec rdi\n"; + data.code += "\t\tjmp intToStringBuildLoop\n"; + data.code += "\tintToStringBuildLoopEnd:\n"; + data.code += "\tmov byte [dynamic+r14], 0\n"; + data.code += "\tinc r14\n"; + data.code += "\tlea rax, [dynamic+rsi]\n"; + data.code += "\tret\n"; + } return fs::write("out.asm", format!("{}{}{}{}", ASSEMBLY_LINUX_X64_HEADER, data.code, data.arrays, data.strings)); } -// r8: head -// r9: tail -// r10: base +// r11: head +// r12: tail +// r13: base +// r14: dynamic end +// r15: has to be cleaned fn generate_assembly_linux_x64_block(operations: &Vec, functions: &Vec, intrinsics: &HashMap<&str, (Vec, Vec)>, arrays: &Vec, debug: bool) -> AssemblyData { @@ -290,7 +325,7 @@ fn generate_assembly_linux_x64_block(operations: &Vec, functions: &Ve Operation::Dequeue(line, col) => { data.code += format!("\t;;deq {}:{}\n", line, col).as_str(); - data.code += "\tinc r8\n"; + data.code += "\tinc r11\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; } Operation::Enqueue(datatype, value, line, col) => @@ -298,50 +333,243 @@ fn generate_assembly_linux_x64_block(operations: &Vec, functions: &Ve data.code += format!("\t;;enq {:?} {} {}:{}\n", datatype, value, line, col).as_str(); match datatype { - Datatype::Int | Datatype::Bool => + Datatype::Int => { - data.code += format!("\tmov qword [queue+r9], {}\n", value).as_str(); + data.code += format!("\tmov qword [queue+8*r12], {}\n", value).as_str(); + } + Datatype::Bool => + { + data.code += format!("\tmov qword [queue+8*r12], {}\n", if value == "true" { 1 } else { 0 }).as_str(); + } + Datatype::String => + { + data.strings += format!("\tstr_{}_{}: db {}, \"{}\", 0\n", line, col, value.len() + 1, value).as_str(); + data.code += format!("\tlea rax, [str_{}_{}]\n", line, col).as_str(); + data.code += "\tmov [queue+8*r12], rax\n"; } - _ => todo!("enq {:?}", datatype) } - data.code += "\tinc r9\n"; + data.code += "\tinc r12\n"; + } + Operation::Requeue(line, col) => + { + data.code += format!("\t;;req {}:{}\n", line, col).as_str(); + data.code += "\tmov rax, [queue+8*r11]\n"; + data.code += "\tmov [queue+8*r12], rax\n"; + data.code += "\tinc r11\n"; } Operation::While(while_operations, line, col) => { data.code += format!("\t;;while {}:{}\n", line, col).as_str(); data.code += format!("while_{}_{}:\n", line, col).as_str(); - data.code += "\tcmp qword [queue+r8], 0\n"; + data.code += "\tcmp qword [queue+8*r11], 0\n"; data.code += format!("\tje while_{}_{}_end\n", line, col).as_str(); - data.code += "\tinc r8\n"; + data.code += "\tinc r11\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; merge_assemblies(&mut data, generate_assembly_linux_x64_block(while_operations, functions, intrinsics, arrays, debug)); data.code += format!("\tjmp while_{}_{}\n", line, col).as_str(); data.code += format!("while_{}_{}_end:\n", line, col).as_str(); - data.code += "\tinc r8\n"; + data.code += "\tinc r11\n"; data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; } + Operation::If(if_operations, maybe_else_operations, line, col) => + { + data.code += format!("\t;;if {}:{}\n", line, col).as_str(); + data.code += "\tcmp qword [queue+8*r11], 0\n"; + data.code += format!("\tjne else_{}_{}\n", line, col).as_str(); + data.code += "\tinc r11\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + merge_assemblies(&mut data, generate_assembly_linux_x64_block(if_operations, functions, intrinsics, arrays, debug)); + data.code += format!("\tjmp if_{}_{}_end\n", line, col).as_str(); + data.code += format!("else_{}_{}:\n", line, col).as_str(); + data.code += "\tinc r11\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + if let Some(else_operations) = maybe_else_operations + { + merge_assemblies(&mut data, generate_assembly_linux_x64_block(else_operations, functions, intrinsics, arrays, debug)); + } + data.code += format!("if_{}_{}_end:\n", line, col).as_str(); + } Operation::Dup(line, col) => { data.code += format!("\t;;dup {}:{}\n", line, col).as_str(); - data.code += "\tmov qword [queue+r9], [queue+r8]\n"; - data.code += "\tinc r9\n"; + data.code += "\tmov rax, [queue+8*r11]\n"; + data.code += "\tmov [queue+8*r12], rax\n"; + data.code += "\tinc r12\n"; } Operation::Intrinsic(name, line, col) => { - data.code += format!("\t;;intrinsic {} {}:{}", name, line, col).as_str(); + data.code += format!("\t;;intrinsic {} {}:{}\n", name, line, col).as_str(); match name.as_str() { "print" => { // For now printing numbers directly is unsupported - data.code += "\trax, 1\n"; - data.code += "\trdi, 1\n"; - data.code += "\tmov rsi, [queue+r8]\n"; - data.code += "\tmov " + data.code += "\tmov rax, 1\n"; + data.code += "\tmov rdi, 1\n"; + // load address + data.code += "\tmov rsi, [queue+8*r11]\n"; + // size + data.code += "\tmov rdx, [rsi]\n"; + // data + data.code += "\tlea rsi, [rsi+8]\n"; + // incorporate the null byte + data.code += "\tinc rdx\n"; + data.code += "\tsyscall\n"; + // TODO: factor this out + data.code += "\tinc r11\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + } + "println" => + { + // For now printing numbers directly is unsupported + data.code += "\tmov rax, 1\n"; + data.code += "\tmov rdi, 1\n"; + // load address + data.code += "\tmov rsi, [queue+8*r11]\n"; + // size + data.code += "\tmov rdx, [rsi]\n"; + // data + data.code += "\tlea rsi, [rsi+8]\n"; + // incorporate the null byte + data.code += "\tinc rdx\n"; + data.code += "\tsyscall\n"; + // TODO: factor this out + data.code += "\tinc r11\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + // TODO: Don't syscall twice + data.code += "\tlea rsi, [newline]\n"; + data.code += "\tmov rdx, 1\n"; + data.code += "\tsyscall\n"; + } + "intToStr" => + { + data.code += "\tmov qword rdi, [queue+8*r11]\n"; + data.code += "\tinc r11\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + data.code += "\tcall intToStr\n"; + data.code += "\tmov [queue+8*r12], rax\n"; + data.code += "\tinc r12\n"; + } + "-" => + { + data.code += "\tmov qword rax, [queue+8*r11]\n"; + data.code += "\tinc r11\n"; + data.code += "\tmov qword rbx, [queue+8*r11]\n"; + data.code += "\tinc r11\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + data.code += "\tsub rax, rbx\n"; + data.code += "\tmov [queue+8*r12], rax\n"; + data.code += "\tinc r12\n"; + } + ">" => + { + data.code += "\tmov rax, [queue+8*r11]\n"; + data.code += "\tcmp qword rax, [queue+8*r11+1]\n"; + data.code += "\txor rax, rax\n"; + data.code += "\tsetg al\n"; + data.code += "\tmov qword [queue+8*r12], rax\n"; + data.code += "\tadd r11, 2\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + data.code += "\tinc r12\n"; + } + "<" => + { + data.code += "\tmov rax, [queue+8*r11]\n"; + data.code += "\tcmp qword rax, [queue+8*r11+1]\n"; + data.code += "\txor rax, rax\n"; + data.code += "\tsetl al\n"; + data.code += "\tmov qword [queue+8*r12], rax\n"; + data.code += "\tadd r11, 2\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + data.code += "\tinc r12\n"; + } + ">=" => + { + data.code += "\tmov rax, [queue+8*r11]\n"; + data.code += "\tcmp qword rax, [queue+8*r11+1]\n"; + data.code += "\txor rax, rax\n"; + data.code += "\tsetge al\n"; + data.code += "\tmov qword [queue+8*r12], rax\n"; + data.code += "\tadd r11, 2\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + data.code += "\tinc r12\n"; + } + "<=" => + { + data.code += "\tmov rax, [queue+8*r11]\n"; + data.code += "\tcmp qword rax, [queue+8*r11+1]\n"; + data.code += "\txor rax, rax\n"; + data.code += "\tsetle al\n"; + data.code += "\tmov qword [queue+8*r12], rax\n"; + data.code += "\tadd r11, 2\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + data.code += "\tinc r12\n"; } _ => todo!("intrinsic {} {}:{}", name, line, col) } } + Operation::Apply(name, word, line, col) => + { + data.code += format!("\t;;apply {}.{} {}:{}\n", name, word, line, col).as_str(); + match word.as_str() + { + "read" => + { + data.code += "\tmov rax, [queue+8*r11]\n"; + data.code += "\tinc r11\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + data.code += format!("\tmov qword [queue+8*r12], [arr_{}+rax]\n", name).as_str(); + data.code += "\tinc r12\n"; + } + "write" => + { + data.code += "\tmov rax, [queue+8*r11]\n"; + data.code += "\tinc r11\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + data.code += format!("\tmov qword [arr_{}+rax], [queue+8*r11]\n", name).as_str(); + data.code += "\tinc r11\n"; + data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE; + } + _ => todo!("apply {}", word) + } + } + Operation::FunctionCall(name, line, col) => + { + data.code += format!("\t;;func call {} {}:{}\n", name, line, col).as_str(); + let function = functions.iter().find(|x| &x.name == name).unwrap(); + for _ in 0..function.ins.len() + { + data.code += "\tmov rax, [queue+8*r11]\n"; + data.code += "\tmov [queue+8*r12], rax\n"; + data.code += "\tinc r11\n"; + data.code += "\tinc r12\n"; + } + data.code += "\t;; move pointers\n"; + // save the current base + data.code += "\tmov rbx, r13\n"; + // save the current head + data.code += "\tmov r15, r11\n"; + // prepare the layout + data.code += "\tmov r13, r12\n"; + data.code += format!("\tsub r13, {}\n", function.ins.len()).as_str(); + data.code += "\tmov r11, r13\n"; + // call + data.code += format!("\tcall {}\n", name).as_str(); + // move the sub-queue back to the base + for _ in 0..function.outs.len() + { + data.code += "\tmov rax, [queue+8*r11]\n"; + data.code += "\tmov [queue+8*r13], rax\n"; + data.code += "\tinc r11\n"; + data.code += "\tinc r13\n"; + } + // restore the tail + data.code += "\tmov r12, r13\n"; + // restore the base + data.code += "\tmov r13, rbx\n"; + // restore the head + data.code += "\tmov r11, r15\n"; + } _ => todo!("{:?}", operation) } } @@ -357,6 +585,7 @@ fn generate_assembly_linux_x64_function(name: &str, operations: &Vec, strings: String::new(), }; merge_assemblies(&mut data, generate_assembly_linux_x64_block(operations, functions, intrinsics, arrays, debug)); + data.code += "\tret\n"; return data; } @@ -1092,7 +1321,7 @@ fn extract_functions(tokens: &mut Vec, intrinsics: &HashMap<&str, (Vec