Finish enough asm generation to compile tests/while.qbl

This commit is contained in:
0x4261756D 2023-01-03 20:39:08 +01:00
parent 5bae80e9aa
commit 84442e5eb9
2 changed files with 136 additions and 106 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
/target
out*

View File

@ -77,6 +77,7 @@ enum Operation
Apply(String, String, i32, i32),
Depth(i32, i32),
QueueDiagnostic(i32, i32),
Interrupt(i32, i32),
}
fn main()
@ -92,6 +93,7 @@ fn main()
("<", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
(">", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
(">=", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
("<=", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
("==", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
("!=", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
("&&", (Vec::from([Datatype::Bool, Datatype::Bool]), Vec::from([Datatype::Bool]))),
@ -246,7 +248,7 @@ fn merge_assemblies(data: &mut AssemblyData, data2: AssemblyData)
const ASSEMBLY_LINUX_X64_QUEUE_LENGTH: u32 = 1024;
const ASSEMBLY_LINUX_X64_HEADER: &str = "format ELF64 executable 3\n";
const ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE: &str = "\tcmp r11, r12\n\tcmove r11, r13\n\tcmove r12, r13\n";
const ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE: &str = "\tcmp r12, r13\n\tcmove r12, r14\n\tcmove r13, r14\n";
const ASSEMBLY_LINUX_X64_EXIT: &str = "\tmov rax, 60\n\tmov rdi, 0\n\tsyscall\n";
const ASSEMBLY_LINUX_X64_DYNAMIC_DATA_LENGTH: u32 = 16384;
@ -255,14 +257,15 @@ fn generate_assembly_linux_x64(operations: &Vec<Operation>, functions: &Vec<Func
let mut data = AssemblyData
{
arrays: format!("segment readable writeable\n\tqueue: rq {}\n\tdynamic: rb {}\n", ASSEMBLY_LINUX_X64_QUEUE_LENGTH, ASSEMBLY_LINUX_X64_DYNAMIC_DATA_LENGTH),
strings: String::from("segment readable\nnewline: db 10\n"),
strings: String::from("segment readable\n\tnewline: db 10\n"),
code: String::from("segment executable\n"),
};
for array in arrays
{
data.arrays += format!("\tarr_{}: rq {}\n", array.name, array.length).as_str();
}
merge_assemblies(&mut data, generate_assembly_linux_x64_function("_start", operations, functions, intrinsics, arrays, debug));
data.code += "_start:\n";
merge_assemblies(&mut data, generate_assembly_linux_x64_block(operations, functions, intrinsics, arrays, debug));
data.code += ASSEMBLY_LINUX_X64_EXIT;
for function in functions
{
@ -275,6 +278,7 @@ fn generate_assembly_linux_x64(operations: &Vec<Operation>, functions: &Vec<Func
data.code += "\tmov rax, rdi\n";
data.code += "\tmov rsi, 10\n";
data.code += "\txor rdi, rdi\n";
data.code += "\txor rdx, rdx\n";
data.code += "\tintToStringLoop:\n";
data.code += "\t\tdiv rsi\n";
data.code += "\t\tadd rdx, 48\n";
@ -283,20 +287,20 @@ fn generate_assembly_linux_x64(operations: &Vec<Operation>, functions: &Vec<Func
data.code += "\t\tinc rdi\n";
data.code += "\t\tcmp rax, 0\n";
data.code += "\t\tjne intToStringLoop\n";
data.code += "\tmov rsi, r14\n";
data.code += "\tmov qword [dynamic+r14], rdi\n";
data.code += "\tadd r14, 8\n";
data.code += "\tmov rsi, r15\n";
data.code += "\tmov qword [dynamic+r15], rdi\n";
data.code += "\tadd r15, 8\n";
data.code += "\tintToStringBuildLoop:\n";
data.code += "\t\tcmp rdi, 0\n";
data.code += "\t\tje intToStringBuildLoopEnd\n";
data.code += "\t\tpop rax\n";
data.code += "\t\tmov byte [dynamic+r14], byte al\n";
data.code += "\t\tinc r14\n";
data.code += "\t\tmov byte [dynamic+r15], byte al\n";
data.code += "\t\tinc r15\n";
data.code += "\t\tdec rdi\n";
data.code += "\t\tjmp intToStringBuildLoop\n";
data.code += "\tintToStringBuildLoopEnd:\n";
data.code += "\tmov byte [dynamic+r14], 0\n";
data.code += "\tinc r14\n";
data.code += "\tmov byte [dynamic+r15], 0\n";
data.code += "\tinc r15\n";
data.code += "\tlea rax, [dynamic+rsi]\n";
data.code += "\tret\n";
}
@ -304,11 +308,10 @@ fn generate_assembly_linux_x64(operations: &Vec<Operation>, functions: &Vec<Func
return fs::write("out.asm", format!("{}{}{}{}", ASSEMBLY_LINUX_X64_HEADER, data.code, data.arrays, data.strings));
}
// r11: head
// r12: tail
// r13: base
// r14: dynamic end
// r15: has to be cleaned
// r12: head
// r13: tail
// r14: base
// r15: dynamic end
fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, arrays: &Vec<Arr>, debug: bool) -> AssemblyData
{
@ -325,7 +328,7 @@ fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Ve
Operation::Dequeue(line, col) =>
{
data.code += format!("\t;;deq {}:{}\n", line, col).as_str();
data.code += "\tinc r11\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
}
Operation::Enqueue(datatype, value, line, col) =>
@ -335,53 +338,56 @@ fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Ve
{
Datatype::Int =>
{
data.code += format!("\tmov qword [queue+8*r12], {}\n", value).as_str();
data.code += format!("\tmov qword [queue+8*r13], {}\n", value).as_str();
}
Datatype::Bool =>
{
data.code += format!("\tmov qword [queue+8*r12], {}\n", if value == "true" { 1 } else { 0 }).as_str();
data.code += format!("\tmov qword [queue+8*r13], {}\n", if value == "true" { 1 } else { 0 }).as_str();
}
Datatype::String =>
{
data.strings += format!("\tstr_{}_{}: db {}, \"{}\", 0\n", line, col, value.len() + 1, value).as_str();
data.strings += format!("\tstr_{}_{}: dq {}, \"{}\", 0\n", line, col, value.len(), value).as_str();
data.code += format!("\tlea rax, [str_{}_{}]\n", line, col).as_str();
data.code += "\tmov [queue+8*r12], rax\n";
data.code += "\tmov [queue+8*r13], rax\n";
}
}
data.code += "\tinc r12\n";
data.code += "\tinc r13\n";
}
Operation::Requeue(line, col) =>
{
data.code += format!("\t;;req {}:{}\n", line, col).as_str();
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tmov [queue+8*r12], rax\n";
data.code += "\tinc r11\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r12\n";
data.code += "\tinc r13\n";
}
Operation::While(while_operations, line, col) =>
{
data.code += format!("\t;;while {}:{}\n", line, col).as_str();
data.code += format!("while_{}_{}:\n", line, col).as_str();
data.code += "\tcmp qword [queue+8*r11], 0\n";
data.code += "\tcmp qword [queue+8*r12], 0\n";
data.code += format!("\tje while_{}_{}_end\n", line, col).as_str();
data.code += "\tinc r11\n";
data.code += format!("while_{}_{}:\n", line, col).as_str();
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
merge_assemblies(&mut data, generate_assembly_linux_x64_block(while_operations, functions, intrinsics, arrays, debug));
data.code += format!("\tjmp while_{}_{}\n", line, col).as_str();
data.code += "\tcmp qword [queue+8*r12], 0\n";
data.code += format!("\tjne while_{}_{}\n", line, col).as_str();
data.code += format!("while_{}_{}_end:\n", line, col).as_str();
data.code += "\tinc r11\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
}
Operation::If(if_operations, maybe_else_operations, line, col) =>
{
data.code += format!("\t;;if {}:{}\n", line, col).as_str();
data.code += "\tcmp qword [queue+8*r11], 0\n";
data.code += format!("\tjne else_{}_{}\n", line, col).as_str();
data.code += "\tinc r11\n";
data.code += "\tcmp qword [queue+8*r12], 0\n";
data.code += format!("\tje else_{}_{}\n", line, col).as_str();
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
merge_assemblies(&mut data, generate_assembly_linux_x64_block(if_operations, functions, intrinsics, arrays, debug));
data.code += format!("\tjmp if_{}_{}_end\n", line, col).as_str();
data.code += format!("else_{}_{}:\n", line, col).as_str();
data.code += "\tinc r11\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
if let Some(else_operations) = maybe_else_operations
{
@ -392,9 +398,9 @@ fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Ve
Operation::Dup(line, col) =>
{
data.code += format!("\t;;dup {}:{}\n", line, col).as_str();
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tmov [queue+8*r12], rax\n";
data.code += "\tinc r12\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r13\n";
}
Operation::Intrinsic(name, line, col) =>
{
@ -407,7 +413,7 @@ fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Ve
data.code += "\tmov rax, 1\n";
data.code += "\tmov rdi, 1\n";
// load address
data.code += "\tmov rsi, [queue+8*r11]\n";
data.code += "\tmov rsi, [queue+8*r12]\n";
// size
data.code += "\tmov rdx, [rsi]\n";
// data
@ -416,7 +422,7 @@ fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Ve
data.code += "\tinc rdx\n";
data.code += "\tsyscall\n";
// TODO: factor this out
data.code += "\tinc r11\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
}
"println" =>
@ -425,7 +431,7 @@ fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Ve
data.code += "\tmov rax, 1\n";
data.code += "\tmov rdi, 1\n";
// load address
data.code += "\tmov rsi, [queue+8*r11]\n";
data.code += "\tmov rsi, [queue+8*r12]\n";
// size
data.code += "\tmov rdx, [rsi]\n";
// data
@ -434,76 +440,80 @@ fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Ve
data.code += "\tinc rdx\n";
data.code += "\tsyscall\n";
// TODO: factor this out
data.code += "\tinc r11\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
// TODO: Don't syscall twice
data.code += "\tmov rax, 1\n";
data.code += "\tlea rsi, [newline]\n";
data.code += "\tmov rdx, 1\n";
data.code += "\tsyscall\n";
}
"intToStr" =>
{
data.code += "\tmov qword rdi, [queue+8*r11]\n";
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tcall intToStr\n";
data.code += "\tmov [queue+8*r12], rax\n";
data.code += "\tmov qword rdi, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += "\tcall intToStr\n";
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r13\n";
}
"-" =>
{
data.code += "\tmov qword rax, [queue+8*r11]\n";
data.code += "\tinc r11\n";
data.code += "\tmov qword rbx, [queue+8*r11]\n";
data.code += "\tinc r11\n";
data.code += "\tmov qword rax, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += "\tmov qword rbx, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tsub rax, rbx\n";
data.code += "\tmov [queue+8*r12], rax\n";
data.code += "\tinc r12\n";
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r13\n";
}
">" =>
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tcmp qword rax, [queue+8*r11+1]\n";
data.code += "\txor rax, rax\n";
data.code += "\tsetg al\n";
data.code += "\tmov qword [queue+8*r12], rax\n";
data.code += "\tadd r11, 2\n";
data.code += "\tmov rbx, 0\n";
data.code += "\tmov rcx, 1\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tcmp qword rax, [queue+8*r12+8]\n";
data.code += "\tcmovg rbx, rcx\n";
data.code += "\tmov qword [queue+8*r13], rbx\n";
data.code += "\tadd r12, 2\n";
data.code += "\tinc r13\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tinc r12\n";
}
"<" =>
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tcmp qword rax, [queue+8*r11+1]\n";
data.code += "\txor rax, rax\n";
data.code += "\tsetl al\n";
data.code += "\tmov qword [queue+8*r12], rax\n";
data.code += "\tadd r11, 2\n";
data.code += "\tmov rbx, 0\n";
data.code += "\tmov rcx, 1\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tcmp qword rax, [queue+8*r12+8]\n";
data.code += "\tcmovl rbx, rcx\n";
data.code += "\tmov qword [queue+8*r13], rbx\n";
data.code += "\tadd r12, 2\n";
data.code += "\tinc r13\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tinc r12\n";
}
">=" =>
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tcmp qword rax, [queue+8*r11+1]\n";
data.code += "\txor rax, rax\n";
data.code += "\tsetge al\n";
data.code += "\tmov qword [queue+8*r12], rax\n";
data.code += "\tadd r11, 2\n";
data.code += "\tmov rbx, 0\n";
data.code += "\tmov rcx, 1\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tcmp qword rax, [queue+8*r12+8]\n";
data.code += "\tcmovge rbx, rcx\n";
data.code += "\tmov qword [queue+8*r13], rbx\n";
data.code += "\tadd r12, 2\n";
data.code += "\tinc r13\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tinc r12\n";
}
"<=" =>
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tcmp qword rax, [queue+8*r11+1]\n";
data.code += "\txor rax, rax\n";
data.code += "\tsetle al\n";
data.code += "\tmov qword [queue+8*r12], rax\n";
data.code += "\tadd r11, 2\n";
data.code += "\tmov rbx, 0\n";
data.code += "\tmov rcx, 1\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tcmp qword rax, [queue+8*r12+8]\n";
data.code += "\tcmovle rbx, rcx\n";
data.code += "\tmov qword [queue+8*r13], rbx\n";
data.code += "\tadd r12, 2\n";
data.code += "\tinc r13\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tinc r12\n";
}
_ => todo!("intrinsic {} {}:{}", name, line, col)
}
@ -515,19 +525,19 @@ fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Ve
{
"read" =>
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tinc r11\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += format!("\tmov qword [queue+8*r12], [arr_{}+rax]\n", name).as_str();
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += format!("\tmov qword [queue+8*r13], [arr_{}+rax]\n", name).as_str();
data.code += "\tinc r13\n";
}
"write" =>
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tinc r11\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += format!("\tmov qword [arr_{}+rax], [queue+8*r11]\n", name).as_str();
data.code += "\tinc r11\n";
data.code += format!("\tmov qword [arr_{}+rax], [queue+8*r12]\n", name).as_str();
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
}
_ => todo!("apply {}", word)
@ -539,36 +549,42 @@ fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Ve
let function = functions.iter().find(|x| &x.name == name).unwrap();
for _ in 0..function.ins.len()
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tmov [queue+8*r12], rax\n";
data.code += "\tinc r11\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r12\n";
data.code += "\tinc r13\n";
}
data.code += "\t;; move pointers\n";
// save the current base
data.code += "\tmov rbx, r13\n";
data.code += "\tpush r14\n";
// save the current head
data.code += "\tmov r15, r11\n";
data.code += "\tpush r12\n";
// prepare the layout
data.code += "\tmov r13, r12\n";
data.code += format!("\tsub r13, {}\n", function.ins.len()).as_str();
data.code += "\tmov r11, r13\n";
data.code += "\tmov r14, r13\n";
data.code += format!("\tsub r14, {}\n", function.ins.len()).as_str();
data.code += "\tmov r12, r14\n";
// call
data.code += format!("\tcall {}\n", name).as_str();
// move the sub-queue back to the base
for _ in 0..function.outs.len()
{
data.code += "\tmov rax, [queue+8*r11]\n";
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r11\n";
data.code += "\tinc r13\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tmov [queue+8*r14], rax\n";
data.code += "\tinc r12\n";
data.code += "\tinc r14\n";
}
// restore the tail
data.code += "\tmov r12, r13\n";
// restore the base
data.code += "\tmov r13, rbx\n";
data.code += "\tmov r13, r14\n";
// restore the head
data.code += "\tmov r11, r15\n";
data.code += "\tpop r12\n";
// restore the base
data.code += "\tpop r14\n";
}
Operation::Interrupt(line, col) =>
{
data.code += "lea r8, [queue]\n";
data.code += format!("mov r9, {}\n", 1000*line + col).as_str();
data.code += "int3\n";
}
_ => todo!("{:?}", operation)
}
@ -684,17 +700,23 @@ fn interpret_program(operations: &Vec<Operation>, queue: &mut Vec<String>, funct
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first > second).to_string());
}
"<" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first < second).to_string());
}
">=" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first >= second).to_string());
}
"<" =>
"<=" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first < second).to_string());
queue.push((first <= second).to_string());
}
"==" =>
{
@ -802,6 +824,7 @@ fn interpret_program(operations: &Vec<Operation>, queue: &mut Vec<String>, funct
{
println!("---Queue state at {}:{}---\nlength: {}\n{:?}\n------------------------------", line, col, queue.len(), queue);
}
Operation::Interrupt(_, _) => {}
}
if debug
{
@ -848,6 +871,7 @@ fn typecheck_block(operations: &Vec<Operation>, ins: &Vec<Datatype>, outs: &Vec<
{
match operation
{
Operation::Interrupt(line, col) |
Operation::Enqueue(_, _, line, col) |
Operation::Dequeue(line, col) |
Operation::Requeue(line, col) |
@ -1026,6 +1050,7 @@ fn get_return_type(operations: &Vec<Operation>, ins: &Vec<Datatype>, functions:
{
println!("---Type queue state at {}:{}---\nlength: {}\n{:?}\n------------------------------", line, col, type_queue.len(), type_queue);
}
Operation::Interrupt(_, _) => {}
Operation::Apply(name, word, line, col) =>
{
match word.as_str()
@ -1097,7 +1122,7 @@ fn validate_function_calls_in_block(block: &Vec<Operation>, functions: &Vec<Func
match operation
{
Operation::Depth(_, _) | Operation::QueueDiagnostic(_, _) | Operation::Intrinsic(_, _, _) | Operation::Enqueue(_, _, _, _) | Operation::Dequeue(_, _) |
Operation::Requeue(_, _) | Operation::Dup(_, _) | Operation::Swap(_, _) | Operation::Apply(_, _, _, _) => {},
Operation::Requeue(_, _) | Operation::Dup(_, _) | Operation::Swap(_, _) | Operation::Apply(_, _, _, _) | Operation::Interrupt(_, _) => {},
Operation::FunctionCall(function_name, line, col) =>
{
if !functions.iter().any(|x| &x.name == function_name)
@ -1449,6 +1474,10 @@ fn parse_until_delimiter(tokens_iter: &mut Peekable<std::slice::Iter<Token>>, in
{
operations.push(Operation::QueueDiagnostic(*line, *col));
}
else if word == "interrupt"
{
operations.push(Operation::Interrupt(*line, *col));
}
else if Some(word.as_str()) == delimiter
{
return Ok(operations);