Assembly generation, WIP

This commit is contained in:
0x4261756D 2023-01-03 17:07:57 +01:00
parent fbccc9dd15
commit 5bae80e9aa
1 changed files with 253 additions and 24 deletions

View File

@ -246,34 +246,69 @@ fn merge_assemblies(data: &mut AssemblyData, data2: AssemblyData)
const ASSEMBLY_LINUX_X64_QUEUE_LENGTH: u32 = 1024;
const ASSEMBLY_LINUX_X64_HEADER: &str = "format ELF64 executable 3\n";
const ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE: &str = "\tcmp r8, r9\n\tcmove r8, r10\n\tcmove r9, r10\n";
const ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE: &str = "\tcmp r11, r12\n\tcmove r11, r13\n\tcmove r12, r13\n";
const ASSEMBLY_LINUX_X64_EXIT: &str = "\tmov rax, 60\n\tmov rdi, 0\n\tsyscall\n";
const ASSEMBLY_LINUX_X64_DYNAMIC_DATA_LENGTH: u32 = 16384;
fn generate_assembly_linux_x64(operations: &Vec<Operation>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, arrays: &Vec<Arr>, debug: bool) -> Result<(), std::io::Error>
{
let mut data = AssemblyData
{
arrays: format!("segment readable writeable\n\tqueue: rq {}\n", ASSEMBLY_LINUX_X64_QUEUE_LENGTH),
strings: String::from("segment readable\n"),
arrays: format!("segment readable writeable\n\tqueue: rq {}\n\tdynamic: rb {}\n", ASSEMBLY_LINUX_X64_QUEUE_LENGTH, ASSEMBLY_LINUX_X64_DYNAMIC_DATA_LENGTH),
strings: String::from("segment readable\nnewline: db 10\n"),
code: String::from("segment executable\n"),
};
for array in arrays
{
data.arrays += format!("\tarr_{}: rq {}\n", array.name, array.length).as_str();
}
merge_assemblies(&mut data, generate_assembly_linux_x64_function("_start", operations, functions, intrinsics, arrays, debug));
data.code += ASSEMBLY_LINUX_X64_EXIT;
for function in functions
{
merge_assemblies(&mut data, generate_assembly_linux_x64_function(function.name.as_str(), &function.content, functions, intrinsics, arrays, debug));
}
merge_assemblies(&mut data, generate_assembly_linux_x64_function("_start", operations, functions, intrinsics, arrays, debug));
data.code += ASSEMBLY_LINUX_X64_EXIT;
if data.code.contains("call intToStr")
{
data.code += "intToStr:\n";
data.code += "\tmov rax, rdi\n";
data.code += "\tmov rsi, 10\n";
data.code += "\txor rdi, rdi\n";
data.code += "\tintToStringLoop:\n";
data.code += "\t\tdiv rsi\n";
data.code += "\t\tadd rdx, 48\n";
data.code += "\t\tpush rdx\n";
data.code += "\t\txor rdx, rdx\n";
data.code += "\t\tinc rdi\n";
data.code += "\t\tcmp rax, 0\n";
data.code += "\t\tjne intToStringLoop\n";
data.code += "\tmov rsi, r14\n";
data.code += "\tmov qword [dynamic+r14], rdi\n";
data.code += "\tadd r14, 8\n";
data.code += "\tintToStringBuildLoop:\n";
data.code += "\t\tcmp rdi, 0\n";
data.code += "\t\tje intToStringBuildLoopEnd\n";
data.code += "\t\tpop rax\n";
data.code += "\t\tmov byte [dynamic+r14], byte al\n";
data.code += "\t\tinc r14\n";
data.code += "\t\tdec rdi\n";
data.code += "\t\tjmp intToStringBuildLoop\n";
data.code += "\tintToStringBuildLoopEnd:\n";
data.code += "\tmov byte [dynamic+r14], 0\n";
data.code += "\tinc r14\n";
data.code += "\tlea rax, [dynamic+rsi]\n";
data.code += "\tret\n";
}
return fs::write("out.asm", format!("{}{}{}{}", ASSEMBLY_LINUX_X64_HEADER, data.code, data.arrays, data.strings));
}
// r8: head
// r9: tail
// r10: base
// r11: head
// r12: tail
// r13: base
// r14: dynamic end
// r15: has to be cleaned
fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, arrays: &Vec<Arr>, debug: bool) -> AssemblyData
{
@ -290,7 +325,7 @@ fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Ve
Operation::Dequeue(line, col) =>
{
data.code += format!("\t;;deq {}:{}\n", line, col).as_str();
data.code += "\tinc r8\n";
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
}
Operation::Enqueue(datatype, value, line, col) =>
@ -298,50 +333,243 @@ fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Ve
data.code += format!("\t;;enq {:?} {} {}:{}\n", datatype, value, line, col).as_str();
match datatype
{
Datatype::Int | Datatype::Bool =>
Datatype::Int =>
{
data.code += format!("\tmov qword [queue+r9], {}\n", value).as_str();
data.code += format!("\tmov qword [queue+8*r12], {}\n", value).as_str();
}
Datatype::Bool =>
{
data.code += format!("\tmov qword [queue+8*r12], {}\n", if value == "true" { 1 } else { 0 }).as_str();
}
Datatype::String =>
{
data.strings += format!("\tstr_{}_{}: db {}, \"{}\", 0\n", line, col, value.len() + 1, value).as_str();
data.code += format!("\tlea rax, [str_{}_{}]\n", line, col).as_str();
data.code += "\tmov [queue+8*r12], rax\n";
}
_ => todo!("enq {:?}", datatype)
}
data.code += "\tinc r9\n";
data.code += "\tinc r12\n";
}
Operation::Requeue(line, col) =>
{
data.code += format!("\t;;req {}:{}\n", line, col).as_str();
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tmov [queue+8*r12], rax\n";
data.code += "\tinc r11\n";
}
Operation::While(while_operations, line, col) =>
{
data.code += format!("\t;;while {}:{}\n", line, col).as_str();
data.code += format!("while_{}_{}:\n", line, col).as_str();
data.code += "\tcmp qword [queue+r8], 0\n";
data.code += "\tcmp qword [queue+8*r11], 0\n";
data.code += format!("\tje while_{}_{}_end\n", line, col).as_str();
data.code += "\tinc r8\n";
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
merge_assemblies(&mut data, generate_assembly_linux_x64_block(while_operations, functions, intrinsics, arrays, debug));
data.code += format!("\tjmp while_{}_{}\n", line, col).as_str();
data.code += format!("while_{}_{}_end:\n", line, col).as_str();
data.code += "\tinc r8\n";
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
}
Operation::If(if_operations, maybe_else_operations, line, col) =>
{
data.code += format!("\t;;if {}:{}\n", line, col).as_str();
data.code += "\tcmp qword [queue+8*r11], 0\n";
data.code += format!("\tjne else_{}_{}\n", line, col).as_str();
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
merge_assemblies(&mut data, generate_assembly_linux_x64_block(if_operations, functions, intrinsics, arrays, debug));
data.code += format!("\tjmp if_{}_{}_end\n", line, col).as_str();
data.code += format!("else_{}_{}:\n", line, col).as_str();
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
if let Some(else_operations) = maybe_else_operations
{
merge_assemblies(&mut data, generate_assembly_linux_x64_block(else_operations, functions, intrinsics, arrays, debug));
}
data.code += format!("if_{}_{}_end:\n", line, col).as_str();
}
Operation::Dup(line, col) =>
{
data.code += format!("\t;;dup {}:{}\n", line, col).as_str();
data.code += "\tmov qword [queue+r9], [queue+r8]\n";
data.code += "\tinc r9\n";
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tmov [queue+8*r12], rax\n";
data.code += "\tinc r12\n";
}
Operation::Intrinsic(name, line, col) =>
{
data.code += format!("\t;;intrinsic {} {}:{}", name, line, col).as_str();
data.code += format!("\t;;intrinsic {} {}:{}\n", name, line, col).as_str();
match name.as_str()
{
"print" =>
{
// For now printing numbers directly is unsupported
data.code += "\trax, 1\n";
data.code += "\trdi, 1\n";
data.code += "\tmov rsi, [queue+r8]\n";
data.code += "\tmov "
data.code += "\tmov rax, 1\n";
data.code += "\tmov rdi, 1\n";
// load address
data.code += "\tmov rsi, [queue+8*r11]\n";
// size
data.code += "\tmov rdx, [rsi]\n";
// data
data.code += "\tlea rsi, [rsi+8]\n";
// incorporate the null byte
data.code += "\tinc rdx\n";
data.code += "\tsyscall\n";
// TODO: factor this out
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
}
"println" =>
{
// For now printing numbers directly is unsupported
data.code += "\tmov rax, 1\n";
data.code += "\tmov rdi, 1\n";
// load address
data.code += "\tmov rsi, [queue+8*r11]\n";
// size
data.code += "\tmov rdx, [rsi]\n";
// data
data.code += "\tlea rsi, [rsi+8]\n";
// incorporate the null byte
data.code += "\tinc rdx\n";
data.code += "\tsyscall\n";
// TODO: factor this out
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
// TODO: Don't syscall twice
data.code += "\tlea rsi, [newline]\n";
data.code += "\tmov rdx, 1\n";
data.code += "\tsyscall\n";
}
"intToStr" =>
{
data.code += "\tmov qword rdi, [queue+8*r11]\n";
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tcall intToStr\n";
data.code += "\tmov [queue+8*r12], rax\n";
data.code += "\tinc r12\n";
}
"-" =>
{
data.code += "\tmov qword rax, [queue+8*r11]\n";
data.code += "\tinc r11\n";
data.code += "\tmov qword rbx, [queue+8*r11]\n";
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tsub rax, rbx\n";
data.code += "\tmov [queue+8*r12], rax\n";
data.code += "\tinc r12\n";
}
">" =>
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tcmp qword rax, [queue+8*r11+1]\n";
data.code += "\txor rax, rax\n";
data.code += "\tsetg al\n";
data.code += "\tmov qword [queue+8*r12], rax\n";
data.code += "\tadd r11, 2\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tinc r12\n";
}
"<" =>
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tcmp qword rax, [queue+8*r11+1]\n";
data.code += "\txor rax, rax\n";
data.code += "\tsetl al\n";
data.code += "\tmov qword [queue+8*r12], rax\n";
data.code += "\tadd r11, 2\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tinc r12\n";
}
">=" =>
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tcmp qword rax, [queue+8*r11+1]\n";
data.code += "\txor rax, rax\n";
data.code += "\tsetge al\n";
data.code += "\tmov qword [queue+8*r12], rax\n";
data.code += "\tadd r11, 2\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tinc r12\n";
}
"<=" =>
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tcmp qword rax, [queue+8*r11+1]\n";
data.code += "\txor rax, rax\n";
data.code += "\tsetle al\n";
data.code += "\tmov qword [queue+8*r12], rax\n";
data.code += "\tadd r11, 2\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tinc r12\n";
}
_ => todo!("intrinsic {} {}:{}", name, line, col)
}
}
Operation::Apply(name, word, line, col) =>
{
data.code += format!("\t;;apply {}.{} {}:{}\n", name, word, line, col).as_str();
match word.as_str()
{
"read" =>
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += format!("\tmov qword [queue+8*r12], [arr_{}+rax]\n", name).as_str();
data.code += "\tinc r12\n";
}
"write" =>
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += format!("\tmov qword [arr_{}+rax], [queue+8*r11]\n", name).as_str();
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
}
_ => todo!("apply {}", word)
}
}
Operation::FunctionCall(name, line, col) =>
{
data.code += format!("\t;;func call {} {}:{}\n", name, line, col).as_str();
let function = functions.iter().find(|x| &x.name == name).unwrap();
for _ in 0..function.ins.len()
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tmov [queue+8*r12], rax\n";
data.code += "\tinc r11\n";
data.code += "\tinc r12\n";
}
data.code += "\t;; move pointers\n";
// save the current base
data.code += "\tmov rbx, r13\n";
// save the current head
data.code += "\tmov r15, r11\n";
// prepare the layout
data.code += "\tmov r13, r12\n";
data.code += format!("\tsub r13, {}\n", function.ins.len()).as_str();
data.code += "\tmov r11, r13\n";
// call
data.code += format!("\tcall {}\n", name).as_str();
// move the sub-queue back to the base
for _ in 0..function.outs.len()
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r11\n";
data.code += "\tinc r13\n";
}
// restore the tail
data.code += "\tmov r12, r13\n";
// restore the base
data.code += "\tmov r13, rbx\n";
// restore the head
data.code += "\tmov r11, r15\n";
}
_ => todo!("{:?}", operation)
}
}
@ -357,6 +585,7 @@ fn generate_assembly_linux_x64_function(name: &str, operations: &Vec<Operation>,
strings: String::new(),
};
merge_assemblies(&mut data, generate_assembly_linux_x64_block(operations, functions, intrinsics, arrays, debug));
data.code += "\tret\n";
return data;
}
@ -1092,7 +1321,7 @@ fn extract_functions(tokens: &mut Vec<Token>, intrinsics: &HashMap<&str, (Vec<Da
if debug
{
println!("outs: {:?}", outs);
}
}
let block = parse_block(&mut tokens_iter, intrinsics, debug)?;
functions.push(Function {name: word.clone(), ins, outs, content: block});
break;