kurz/src/main.rs

1820 lines
55 KiB
Rust

use core::panic;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::iter::Peekable;
use std::process::Command;
use std::process::Stdio;
use std::process::exit;
use std::time::Instant;
#[derive(Debug, Clone, PartialEq)]
enum Token
{
StringLit(String, i32, i32),
IntLit(String, i32, i32),
BoolLit(String, i32, i32),
Keyword(String, i32, i32),
Apply(String, String, i32, i32),
}
enum TokenizerState
{
Whitespace,
Quote,
Keyword,
Comment,
}
#[derive(Debug,Clone,Copy, PartialEq)]
enum Datatype
{
Int,
String,
Bool,
//Pointer,
// Any,
}
// impl PartialEq for Datatype
// {
// fn eq(&self, other: &Self) -> bool
// {
// core::mem::discriminant(self) == core::mem::discriminant(&Datatype::Any) ||
// core::mem::discriminant(other) == core::mem::discriminant(&Datatype::Any) ||
// core::mem::discriminant(self) == core::mem::discriminant(other)
// }
// }
#[derive(Debug)]
struct Function
{
name: String,
ins: Vec<Datatype>,
outs: Vec<Datatype>,
content: Vec<Operation>,
}
#[derive(Debug)]
struct Arr
{
name: String,
datatype: Datatype,
length: i64,
data: Vec<String>,
}
#[derive(Debug)]
enum Operation
{
Enqueue(Datatype, String, i32, i32),
Dequeue(i32, i32),
// TODO: req can be implemented in terms of dup and dequeue
Requeue(i32, i32),
Swap(i32, i32),
Dup(i32, i32),
Intrinsic(String, i32, i32),
FunctionCall(String, i32, i32),
If(Vec<Operation>, Option<Vec<Operation>>, i32, i32),
While(Vec<Operation>, i32, i32),
Apply(String, String, i32, i32),
Depth(i32, i32),
QueueDiagnostic(i32, i32),
Interrupt(i32, i32),
}
fn main()
{
let intrinsics: HashMap<&str, (Vec<Datatype>, Vec<Datatype>)> = HashMap::from(
[
("print", (Vec::from([Datatype::String]), Vec::new())),
("println", (Vec::from([Datatype::String]), Vec::new())),
("intToStr", (Vec::from([Datatype::Int]), Vec::from([Datatype::String]))),
("-", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))),
("+", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))),
("*", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Int]))),
("<", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
(">", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
(">=", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
("<=", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
("==", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
("!=", (Vec::from([Datatype::Int, Datatype::Int]), Vec::from([Datatype::Bool]))),
("&&", (Vec::from([Datatype::Bool, Datatype::Bool]), Vec::from([Datatype::Bool]))),
("decrease", (Vec::from([Datatype::Int]), Vec::from([Datatype::Int]))),
]);
let args: Vec<String> = env::args().collect();
if args.len() < 2
{
usage()
}
let mut debug = false;
let mut interpret = false;
let mut run = false;
for arg in &args[3..]
{
match arg.as_str()
{
"-d" | "--debug" => debug = true,
"-i" | "--interpret" => interpret = true,
"-r" | "--run" => run = true,
_ => panic!("Unknown option {}", arg),
}
}
match args[1].as_str()
{
"-t" | "--test" =>
{
let mut count = 0;
for f in fs::read_dir(&args[2]).unwrap()
{
let f = f.unwrap();
let file_content = fs::read_to_string(f.path()).unwrap().replace("\r\n", "\n");
println!("========NOW TESTING {:?}========", f.path());
match compile(&file_content, &intrinsics, interpret, run, debug)
{
Ok(maybe_msg) =>
{
println!("---Successfully parsed {:?}---", f.path());
if let Some(msg) = &maybe_msg
{
print!("---Output---\n'{}'\n", msg);
}
let expected = &format!("//valid,{}:END:", maybe_msg.unwrap_or(String::new()).replace("\n", "\n//"));
if file_content.starts_with(expected)
{
println!("===PASSED===");
count += 1;
}
else if let Some(index) = file_content.find(":END:")
{
let expected_output = file_content[8..index].replace("\n//", "\n");
println!("\n===FAILED===\nExpected the output to be\n'{}'\n({})", expected_output, expected);
}
else
{
panic!("Could not find an ending marker (:END:) for the expected output in {:?}", f.file_name());
}
}
Err(msg) =>
{
println!("ERROR: {}", msg);
if file_content.starts_with(&format!("//invalid,{}:END:", msg.replace("\n", "\n//")))
{
println!("===PASSED===");
count += 1;
}
else if file_content.starts_with("//invalid,")
{
if let Some(index) = file_content.find(":END:")
{
let expected = &format!("//invalid,{}:END:", msg.replace("\n", "\n//"));
let expected_output = file_content[10..index].replace("\n//", "\n");
println!("\n===FAILED===\nExpected the output to be\n'{}'\n({})", expected_output, expected);
}
else
{
panic!("Could not find an ending marker (:END:) for the expected output in {:?}", f.file_name());
}
}
else
{
println!("Unexpected error");
}
}
}
}
println!("\n\n=========RESULT=========\n{}/{}", count, fs::read_dir(&args[2]).unwrap().count());
}
"-c" | "--compile" =>
{
let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
match compile(&file_content, &intrinsics, interpret, run, debug)
{
Ok(maybe_msg) =>
{
if let Some(msg) = maybe_msg
{
print!("---Output---\n\n{}", msg);
}
}
Err(msg) => println!("ERROR: {}", msg),
}
}
_ => panic!("Unknown option {}", args[1])
}
}
fn compile(file_content: &String, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, interpret: bool, run: bool, debug: bool) -> Result<Option<String>, String>
{
let mut tokens: Vec<Token> = tokenize(&file_content)?;
println!("---Done tokenizing, got {} tokens---", tokens.len());
let functions: Vec<Function> = extract_functions(&mut tokens, &intrinsics, debug)?;
println!("---Done extracting functions, got {} functions and reduced the token count to {}---", functions.len(), tokens.len());
let mut arrays: Vec<Arr> = extract_arrays(&mut tokens, &intrinsics, &functions, debug)?;
println!("---Done extracting arrays, got {} arrays and reduced the token count to {}---", arrays.len(), tokens.len());
let operations = parse_until_delimiter(&mut tokens.iter().peekable(), &intrinsics, None, debug)?;
println!("---Done parsing tokens into {} operations---", operations.len());
validate_function_calls(&operations, &functions, &arrays, debug)?;
println!("---Done validating function calls---");
typecheck(&operations, &functions, &intrinsics, &arrays, debug)?;
println!("---Done typechecking---");
let start = Instant::now();
let output = if interpret
{
println!("---Starting to interpret the program---");
Some(interpret_program(&operations, &mut Vec::new(), &functions, &mut arrays, &intrinsics, debug)?)
}
else
{
None
};
if !interpret
{
if let Err(err) = generate_assembly_linux_x64(&operations, &functions, &intrinsics, &arrays, debug)
{
return Err(err.to_string());
}
let mut fasm_process = match Command::new("fasm").arg("out.asm").spawn()
{
Ok(process) => process,
Err(err) => return Err(format!("Fasm process error: {}", err.to_string())),
};
match fasm_process.wait()
{
Ok(status) =>
{
if !status.success()
{
return Err(format!("fasm exited with an error: {}", status));
}
}
Err(err) => return Err(err.to_string()),
}
}
if run
{
let process = match Command::new("./out").stdout(Stdio::piped()).stderr(Stdio::piped()).spawn()
{
Ok(process) => process,
Err(err) => return Err(err.to_string()),
};
return match process.wait_with_output()
{
Ok(output) =>
{
match String::from_utf8(output.stdout)
{
Ok(stdout) =>
{
match String::from_utf8(output.stderr)
{
Ok(stderr) =>
{
let text = format!("{}{}", stdout, stderr);
match output.status.code()
{
Some(0) => Ok(Some(text)),
_ => Err(text),
}
}
Err(err) => Err(err.to_string()),
}
}
Err(err) => Err(err.to_string()),
}
}
Err(err) => Err(err.to_string()),
};
}
println!("---Done after {:?}---", start.elapsed());
return Ok(output);
}
struct AssemblyData
{
strings: String,
code: String,
arrays: String,
}
fn merge_assemblies(data: &mut AssemblyData, data2: AssemblyData)
{
data.arrays += data2.arrays.as_str();
data.code += data2.code.as_str();
data.strings += data2.strings.as_str();
}
const ASSEMBLY_LINUX_X64_QUEUE_LENGTH: u32 = 1024;
const ASSEMBLY_LINUX_X64_HEADER: &str = "format ELF64 executable 3\n";
const ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE: &str = "\tcmp r12, r13\n\tcmove r12, r14\n\tcmove r13, r14\n";
const ASSEMBLY_LINUX_X64_EXIT: &str = "\tmov rax, 60\n\tmov rdi, 0\n\tsyscall\n";
const ASSEMBLY_LINUX_X64_DYNAMIC_DATA_LENGTH: u32 = 16384;
// r12: head
// r13: tail
// r14: base
// r15: dynamic end
fn generate_assembly_linux_x64(operations: &Vec<Operation>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, arrays: &Vec<Arr>, debug: bool) -> Result<(), std::io::Error>
{
let mut data = AssemblyData
{
arrays: format!("segment readable writeable\n\tqueue: rq {}\n\tdynamic: rb {}\n", ASSEMBLY_LINUX_X64_QUEUE_LENGTH, ASSEMBLY_LINUX_X64_DYNAMIC_DATA_LENGTH),
strings: String::from("segment readable\n\tnewline: db 10\n"),
code: String::from("segment executable\n"),
};
for array in arrays
{
data.arrays += format!("\tarr_{}: rq {}\n", array.name, array.length).as_str();
}
data.code += "_start:\n";
merge_assemblies(&mut data, generate_assembly_linux_x64_block(operations, functions, intrinsics, arrays, debug));
data.code += ASSEMBLY_LINUX_X64_EXIT;
for function in functions
{
merge_assemblies(&mut data, generate_assembly_linux_x64_function(function.name.as_str(), &function.content, functions, intrinsics, arrays, debug));
}
if data.code.contains("call intToStr")
{
data.code += "intToStr:\n";
data.code += "\tmov rax, rdi\n";
data.code += "\tmov rsi, 10\n";
data.code += "\txor rdi, rdi\n";
data.code += "\txor rdx, rdx\n";
data.code += "\tintToStringLoop:\n";
data.code += "\t\tdiv rsi\n";
data.code += "\t\tadd rdx, 48\n";
data.code += "\t\tpush rdx\n";
data.code += "\t\txor rdx, rdx\n";
data.code += "\t\tinc rdi\n";
data.code += "\t\tcmp rax, 0\n";
data.code += "\t\tjne intToStringLoop\n";
data.code += "\tmov rsi, r15\n";
data.code += "\tmov qword [dynamic+r15], rdi\n";
data.code += "\tadd r15, 8\n";
data.code += "\tintToStringBuildLoop:\n";
data.code += "\t\tcmp rdi, 0\n";
data.code += "\t\tje intToStringBuildLoopEnd\n";
data.code += "\t\tpop rax\n";
data.code += "\t\tmov byte [dynamic+r15], byte al\n";
data.code += "\t\tinc r15\n";
data.code += "\t\tdec rdi\n";
data.code += "\t\tjmp intToStringBuildLoop\n";
data.code += "\tintToStringBuildLoopEnd:\n";
data.code += "\tmov byte [dynamic+r15], 0\n";
data.code += "\tinc r15\n";
data.code += "\tlea rax, [dynamic+rsi]\n";
data.code += "\tret\n";
}
if data.code.contains("exception_array_read_out_of_bounds")
{
data.strings += "\texception_array_oob_msg db \"Attempted array out-of-bounds access\", 10\n";
data.code += "exception_array_read_out_of_bounds:\n";
//TODO: report the passed sizes
data.code += "\tmov rax, 1\n";
data.code += "\tmov rdi, 1\n";
// size
data.code += "\tmov rdx, 37\n";
// data
data.code += "\tmov rsi, exception_array_oob_msg\n";
data.code += "\tsyscall\n";
data.code += "\tmov rax, 60\n";
data.code += "\tmov rdi, -1\n";
data.code += "\tsyscall\n";
}
return fs::write("out.asm", format!("{}{}{}{}", ASSEMBLY_LINUX_X64_HEADER, data.code, data.arrays, data.strings));
}
fn generate_assembly_linux_x64_array_oob_check(length: i64) -> String
{
let mut data = String::new();
data += "\t\t;;Array bounds check\n";
data += format!("\tcmp qword rax, {}\n", length).as_str();
data += "\tjge exception_array_read_out_of_bounds\n";
data += "\tcmp qword rax, 0\n";
data += "\tjl exception_array_read_out_of_bounds\n";
data += "\t\t;;Array bounds check over\n";
return data.clone();
}
fn generate_assembly_linux_x64_block(operations: &Vec<Operation>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, arrays: &Vec<Arr>, debug: bool) -> AssemblyData
{
let mut data = AssemblyData
{
arrays: String::new(),
code: String::new(),
strings: String::new(),
};
for operation in operations
{
match operation
{
Operation::Dequeue(line, col) =>
{
data.code += format!("\t;;deq {}:{}\n", line, col).as_str();
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
}
Operation::Enqueue(datatype, value, line, col) =>
{
data.code += format!("\t;;enq {:?} {} {}:{}\n", datatype, value, line, col).as_str();
match datatype
{
Datatype::Int =>
{
data.code += format!("\tmov qword [queue+8*r13], {}\n", value).as_str();
}
Datatype::Bool =>
{
data.code += format!("\tmov qword [queue+8*r13], {}\n", if value == "true" { 1 } else { 0 }).as_str();
}
Datatype::String =>
{
data.strings += format!("\tstr_{}_{}: db {}, {}, {}, {}, {}, {}, {}, {}, \"{}\", 0\n",
line, col,
value.len() % 256,
(value.len() >> 8) % 256,
(value.len() >> 16) % 256,
(value.len() >> 24) % 256,
(value.len() >> 32) % 256,
(value.len() >> 40) % 256,
(value.len() >> 48) % 256,
(value.len() >> 56) % 256,
value).as_str();
data.code += format!("\tlea rax, [str_{}_{}]\n", line, col).as_str();
data.code += "\tmov [queue+8*r13], rax\n";
}
}
data.code += "\tinc r13\n";
}
Operation::Requeue(line, col) =>
{
data.code += format!("\t;;req {}:{}\n", line, col).as_str();
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r13\n";
}
Operation::Swap(line, col) =>
{
data.code += format!("\t;;swp {}:{}\n", line, col).as_str();
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tmov rbx, [queue+8*r12+8]\n";
data.code += "\tadd r12, 2\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tmov [queue+8*r13], rbx\n";
data.code += "\tmov [queue+8*r13+8], rax\n";
data.code += "\tadd r13, 2\n";
}
Operation::While(while_operations, line, col) =>
{
data.code += format!("\t;;while {}:{}\n", line, col).as_str();
data.code += "\tcmp qword [queue+8*r12], 0\n";
data.code += format!("\tje while_{}_{}_end\n", line, col).as_str();
data.code += format!("while_{}_{}:\n", line, col).as_str();
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
merge_assemblies(&mut data, generate_assembly_linux_x64_block(while_operations, functions, intrinsics, arrays, debug));
data.code += "\tcmp qword [queue+8*r12], 0\n";
data.code += format!("\tjne while_{}_{}\n", line, col).as_str();
data.code += format!("while_{}_{}_end:\n", line, col).as_str();
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
}
Operation::If(if_operations, maybe_else_operations, line, col) =>
{
data.code += format!("\t;;if {}:{}\n", line, col).as_str();
data.code += "\tcmp qword [queue+8*r12], 0\n";
data.code += format!("\tje else_{}_{}\n", line, col).as_str();
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
merge_assemblies(&mut data, generate_assembly_linux_x64_block(if_operations, functions, intrinsics, arrays, debug));
data.code += format!("\tjmp if_{}_{}_end\n", line, col).as_str();
data.code += format!("else_{}_{}:\n", line, col).as_str();
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
if let Some(else_operations) = maybe_else_operations
{
merge_assemblies(&mut data, generate_assembly_linux_x64_block(else_operations, functions, intrinsics, arrays, debug));
}
data.code += format!("if_{}_{}_end:\n", line, col).as_str();
}
Operation::Dup(line, col) =>
{
data.code += format!("\t;;dup {}:{}\n", line, col).as_str();
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r13\n";
}
Operation::Intrinsic(name, line, col) =>
{
data.code += format!("\t;;intrinsic {} {}:{}\n", name, line, col).as_str();
match name.as_str()
{
"print" =>
{
// For now printing numbers directly is unsupported
data.code += "\tmov rax, 1\n";
data.code += "\tmov rdi, 1\n";
// load address
data.code += "\tmov rsi, [queue+8*r12]\n";
// size
data.code += "\tmov rdx, [rsi]\n";
// data
data.code += "\tlea rsi, [rsi+8]\n";
// incorporate the null byte
//data.code += "\tinc rdx\n";
data.code += "\tsyscall\n";
// TODO: factor this out
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
}
"println" =>
{
// For now printing numbers directly is unsupported
data.code += "\tmov rax, 1\n";
data.code += "\tmov rdi, 1\n";
// load address
data.code += "\tmov rsi, [queue+8*r12]\n";
// size
data.code += "\tmov rdx, [rsi]\n";
// data
data.code += "\tlea rsi, [rsi+8]\n";
// incorporate the null byte
//data.code += "\tinc rdx\n";
data.code += "\tsyscall\n";
// TODO: factor this out
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
// TODO: Don't syscall twice
data.code += "\tmov rax, 1\n";
data.code += "\tlea rsi, [newline]\n";
data.code += "\tmov rdx, 1\n";
data.code += "\tsyscall\n";
}
"intToStr" =>
{
data.code += "\tmov qword rdi, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tcall intToStr\n";
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r13\n";
}
"-" =>
{
data.code += "\tmov qword rax, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += "\tmov qword rbx, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tsub rax, rbx\n";
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r13\n";
}
"+" =>
{
data.code += "\tmov qword rax, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += "\tmov qword rbx, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tadd rax, rbx\n";
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r13\n";
}
"*" =>
{
data.code += "\tmov qword rax, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += "\tmov qword rbx, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tmul rbx\n";
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r13\n";
}
">" =>
{
data.code += "\tmov rbx, 0\n";
data.code += "\tmov rcx, 1\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tcmp qword rax, [queue+8*r12+8]\n";
data.code += "\tcmovg rbx, rcx\n";
data.code += "\tadd r12, 2\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tmov qword [queue+8*r13], rbx\n";
data.code += "\tinc r13\n";
}
"<" =>
{
data.code += "\tmov rbx, 0\n";
data.code += "\tmov rcx, 1\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tcmp qword rax, [queue+8*r12+8]\n";
data.code += "\tcmovl rbx, rcx\n";
data.code += "\tadd r12, 2\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tmov qword [queue+8*r13], rbx\n";
data.code += "\tinc r13\n";
}
">=" =>
{
data.code += "\tmov rbx, 0\n";
data.code += "\tmov rcx, 1\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tcmp qword rax, [queue+8*r12+8]\n";
data.code += "\tcmovge rbx, rcx\n";
data.code += "\tadd r12, 2\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tmov qword [queue+8*r13], rbx\n";
data.code += "\tinc r13\n";
}
"<=" =>
{
data.code += "\tmov rbx, 0\n";
data.code += "\tmov rcx, 1\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tcmp qword rax, [queue+8*r12+8]\n";
data.code += "\tcmovle rbx, rcx\n";
data.code += "\tadd r12, 2\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tmov qword [queue+8*r13], rbx\n";
data.code += "\tinc r13\n";
}
"==" =>
{
data.code += "\tmov rbx, 0\n";
data.code += "\tmov rcx, 1\n";
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tcmp qword rax, [queue+8*r12+8]\n";
data.code += "\tcmove rbx, rcx\n";
data.code += "\tadd r12, 2\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tmov qword [queue+8*r13], rbx\n";
data.code += "\tinc r13\n";
}
"&&" =>
{
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tmov rbx, [queue+8*r12+8]\n";
data.code += "\tand rax, rbx\n";
data.code += "\tadd r12, 2\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r13\n";
}
_ => todo!("intrinsic {} {}:{}", name, line, col)
}
}
Operation::Apply(name, word, line, col) =>
{
let array = arrays.iter().find(|x| &x.name == name).unwrap();
data.code += format!("\t;;apply {}.{} {}:{}\n", name, word, line, col).as_str();
match word.as_str()
{
"read" =>
{
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += generate_assembly_linux_x64_array_oob_check(array.length).as_str();
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += format!("\tmov qword rbx, [arr_{}+8*rax]\n", name).as_str();
data.code += "\tmov qword [queue+8*r13], rbx\n";
data.code += "\tinc r13\n";
}
"write" =>
{
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += generate_assembly_linux_x64_array_oob_check(array.length).as_str();
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tmov qword rbx, [queue+8*r12]\n";
data.code += format!("\tmov qword [arr_{}+8*rax], rbx\n", name).as_str();
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
}
"length" =>
{
data.code += format!("\tmov qword [queue+8*r13], {}\n", array.length).as_str();
data.code += "\tinc r13\n";
}
_ => todo!("apply {}", word)
}
}
Operation::FunctionCall(name, line, col) =>
{
data.code += format!("\t;;func call {} {}:{}\n", name, line, col).as_str();
let function = functions.iter().find(|x| &x.name == name).unwrap();
for _ in 0..function.ins.len()
{
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tinc r12\n";
data.code += ASSEMBLY_LINUX_X64_TRY_RESET_QUEUE;
data.code += "\tmov [queue+8*r13], rax\n";
data.code += "\tinc r13\n";
}
data.code += "\t;; move pointers\n";
// save the current base
data.code += "\tpush r14\n";
// save the current head
data.code += "\tpush r12\n";
// prepare the layout
data.code += "\tmov r14, r13\n";
data.code += format!("\tsub r14, {}\n", function.ins.len()).as_str();
data.code += "\tmov r12, r14\n";
// call
data.code += format!("\tcall {}\n", name).as_str();
// move the sub-queue back to the base
for _ in 0..function.outs.len()
{
data.code += "\tmov rax, [queue+8*r12]\n";
data.code += "\tmov [queue+8*r14], rax\n";
data.code += "\tinc r12\n";
data.code += "\tinc r14\n";
}
// restore the tail
data.code += "\tmov r13, r14\n";
// restore the head
data.code += "\tpop r12\n";
// restore the base
data.code += "\tpop r14\n";
}
Operation::Interrupt(line, col) =>
{
data.code += format!("\t;;interrupt {}:{}\n", line, col).as_str();
data.code += "lea r8, [queue]\n";
data.code += format!("mov r9, {}\n", 1000*line + col).as_str();
data.code += "int3\n";
}
_ => todo!("{:?}", operation)
}
}
return data;
}
fn generate_assembly_linux_x64_function(name: &str, operations: &Vec<Operation>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, arrays: &Vec<Arr>, debug: bool) -> AssemblyData
{
let mut data = AssemblyData
{
arrays: String::new(),
code: format!("{}:\n", name),
strings: String::new(),
};
merge_assemblies(&mut data, generate_assembly_linux_x64_block(operations, functions, intrinsics, arrays, debug));
data.code += "\tret\n";
return data;
}
fn interpret_program(operations: &Vec<Operation>, queue: &mut Vec<String>, functions: &Vec<Function>, arrays: &mut Vec<Arr>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, debug: bool) -> Result<String,String>
{
let mut output = String::new();
for operation in operations
{
if debug
{
println!("before: {:?}: {:?}, '{}'", operation, queue, output);
}
match operation
{
Operation::Dequeue(_, _) =>
{
queue.remove(0);
}
Operation::Enqueue(_, value, _, _) =>
{
queue.push(value.clone());
}
Operation::Requeue(_, _) =>
{
let val = queue.remove(0);
queue.push(val);
}
Operation::Dup(_, _) =>
{
let val = queue.get(0).unwrap();
queue.push(val.clone());
}
Operation::Swap(_, _) =>
{
let first = queue.remove(0);
let second = queue.remove(0);
queue.push(second);
queue.push(first);
}
Operation::FunctionCall(function_name, _, _) =>
{
let function = functions.iter().find(|x| &x.name == function_name).unwrap();
let function_context: &mut Vec<String> = &mut Vec::new();
for _ in 0..function.ins.len()
{
let val = queue.remove(0);
function_context.push(val);
}
output += interpret_program(&function.content, function_context, functions, arrays, intrinsics, debug)?.as_str();
for val in function_context
{
queue.push(val.to_string());
}
}
Operation::If(if_block, maybe_else_block, _, _) =>
{
let val = queue.remove(0);
if val == "true"
{
output += interpret_program(if_block, queue, functions, arrays, intrinsics, debug)?.as_str();
}
else if let Some(else_block) = maybe_else_block
{
output += interpret_program(else_block, queue, functions, arrays, intrinsics, debug)?.as_str();
}
}
Operation::Intrinsic(intrinsic_name, line, col) =>
{
match intrinsic_name.as_str()
{
"print" =>
{
output += format!("{}", queue.remove(0)).as_str();
}
"-" =>
{
let minuend = queue.remove(0).parse::<i64>().unwrap();
let subtrahend = queue.remove(0).parse::<i64>().unwrap();
queue.push((minuend - subtrahend).to_string());
}
"+" =>
{
let addend1 = queue.remove(0).parse::<i64>().unwrap();
let addend2 = queue.remove(0).parse::<i64>().unwrap();
queue.push((addend1 + addend2).to_string());
}
"*" =>
{
let multiplicant1 = queue.remove(0).parse::<i64>().unwrap();
let multiplicant2 = queue.remove(0).parse::<i64>().unwrap();
queue.push((multiplicant1 * multiplicant2).to_string());
}
">" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first > second).to_string());
}
"<" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first < second).to_string());
}
">=" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first >= second).to_string());
}
"<=" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first <= second).to_string());
}
"==" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first == second).to_string());
}
"!=" =>
{
let first = queue.remove(0).parse::<i64>().unwrap();
let second = queue.remove(0).parse::<i64>().unwrap();
queue.push((first != second).to_string());
}
"&&" =>
{
let first = queue.remove(0).parse::<bool>().unwrap();
let second = queue.remove(0).parse::<bool>().unwrap();
queue.push((first && second).to_string());
}
"decrease" =>
{
let val = queue.remove(0).parse::<i64>().unwrap();
queue.push((val - 1).to_string());
}
"println" =>
{
output += format!("{}\n", queue.remove(0)).as_str();
}
"intToStr" =>
{
let val = queue.remove(0).clone();
queue.push(val);
}
_ =>
{
return Err(format!("Unexpected intrinsic '{}' at {}:{}", intrinsic_name, line, col));
}
}
}
Operation::Apply(name, word, line, col) =>
{
let arr: &mut Arr = arrays.iter_mut().find(|x| &x.name == name).unwrap();
match word.as_str()
{
"write" =>
{
let position: i64 = queue.remove(0).parse::<i64>().unwrap();
if position >= arr.length
{
//return Err(format!("Attempted an out of bounds write for array {} ({} >= {}) at {}:{}", arr.name, position, arr.length, line, col));
return Err(String::from("Attempted array out-of-bounds access"));
}
if position < 0
{
//return Err(format!("Attempted an out of bounds write for array {} ({} < 0) at {}:{}", arr.name, position, line, col));
return Err(String::from("Attempted array out-of-bounds access"));
}
let data = queue.remove(0);
if debug
{
println!("write before: {} {} {:?}", position, data, arr);
}
arr.data[position as usize] = data;
if debug
{
println!("write after: {:?}", arr);
}
}
"read" =>
{
let position: i64 = queue.remove(0).parse::<i64>().unwrap();
if position >= arr.length
{
//return Err(format!("Attempted an out of bounds read for array {} ({} >= {}) at {}:{}", arr.name, position, arr.length, line, col));
return Err(String::from("Attempted array out-of-bounds access"));
}
if position < 0
{
//return Err(format!("Attempted an out of bounds read for array {} ({} < 0) at {}:{}", arr.name, position, line, col));
return Err(String::from("Attempted array out-of-bounds access"));
}
queue.push(arr.data[position as usize].clone());
}
"length" =>
{
queue.push(arr.length.to_string());
}
_ => return Err(format!("Unexpected application '{}' at {}:{}", word, line, col))
}
}
Operation::While(while_block, _, _) =>
{
loop
{
let val = queue.remove(0);
if val == "false"
{
break;
}
output += interpret_program(while_block, queue, functions, arrays, intrinsics, debug)?.as_str();
}
}
Operation::Depth(_, _) =>
{
let depth = queue.len();
queue.push(depth.to_string());
}
Operation::QueueDiagnostic(line, col) =>
{
println!("---Queue state at {}:{}---\nlength: {}\n{:?}\n------------------------------", line, col, queue.len(), queue);
}
Operation::Interrupt(_, _) => {}
}
if debug
{
println!("after: {:?}: {:?}, '{}'", operation, queue, output);
}
}
return Ok(output);
}
fn typecheck(operations: &Vec<Operation>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, arrays: &Vec<Arr>, debug: bool) -> Result<(), String>
{
for function in functions
{
if debug
{
println!("Now typechecking function '{}'", function.name);
}
typecheck_block(&function.content, &function.ins, &function.outs, functions, intrinsics, arrays, debug)?;
if debug
{
println!("Successfully typechecked function '{}'", function.name);
}
}
if debug
{
println!("Now typechecking main operations");
}
typecheck_block(operations, &Vec::new(), &Vec::new(), functions, intrinsics, arrays, debug)?;
if debug
{
println!("Successfully typechecked main operations");
}
return Ok(());
}
fn typecheck_block(operations: &Vec<Operation>, ins: &Vec<Datatype>, outs: &Vec<Datatype>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, arrays: &Vec<Arr>, debug: bool) -> Result<(), String>
{
let actual_outs = get_return_type(operations, ins, functions, intrinsics, arrays, debug)?;
if &actual_outs != outs
{
let (line, col) = match operations.last()
{
Some(operation) =>
{
match operation
{
Operation::Interrupt(line, col) |
Operation::Enqueue(_, _, line, col) |
Operation::Dequeue(line, col) |
Operation::Requeue(line, col) |
Operation::Dup(line, col) |
Operation::Swap(line, col) |
Operation::FunctionCall(_, line, col) |
Operation::If(_, _, line, col) |
Operation::Intrinsic(_, line, col) |
Operation::While(_, line, col) |
Operation::QueueDiagnostic(line, col) |
Operation::Apply(_, _, line, col) |
Operation::Depth(line, col) => (*line, *col),
}
}
None => (-1, -1)
};
return Err(format!("Wrong queue state at the end of a block, expected {:?} but got {:?} at {}:{}", outs, actual_outs, line, col));
}
return Ok(());
}
fn get_return_type(operations: &Vec<Operation>, ins: &Vec<Datatype>, functions: &Vec<Function>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, arrays: &Vec<Arr>, debug: bool) -> Result<Vec<Datatype>, String>
{
let type_queue: &mut Vec<Datatype> = &mut Vec::new();
type_queue.extend_from_slice(ins);
let mut debug_string = String::from("");
for operation in operations
{
if debug
{
debug_string = format!("operation: {:?}: {:?}", operation, type_queue);
}
match operation
{
Operation::Dequeue(line, col) =>
{
if type_queue.is_empty()
{
return Err(format!("Attempted to dequeue an element while the queue was empty at {}:{}", line, col));
}
type_queue.remove(0);
}
Operation::Enqueue(datatype, _, _, _) =>
{
type_queue.push(*datatype);
}
Operation::Dup(line, col) =>
{
if let Some(typ) = type_queue.get(0)
{
type_queue.push(typ.clone());
}
else
{
return Err(format!("Attempted to dup an element while the queue was empty at {}:{}", line, col));
}
}
Operation::Requeue(line, col) =>
{
if type_queue.is_empty()
{
return Err(format!("Attempted to requeue an element while the queue was empty at {}:{}", line, col));
}
let typ = type_queue.remove(0);
type_queue.push(typ);
}
Operation::Swap(line, col) =>
{
if type_queue.is_empty()
{
return Err(format!("Attempted to get the first element for a swap while the queue was empty at {}:{}", line, col));
}
let first_typ = type_queue.remove(0);
if type_queue.is_empty()
{
return Err(format!("Attempted to get the second element for a swap while the queue was empty at {}:{}", line, col));
}
let second_typ = type_queue.remove(0);
type_queue.push(second_typ);
type_queue.push(first_typ);
}
Operation::FunctionCall(function_name, line, col) =>
{
let function = functions.iter().find(|x| &x.name == function_name).unwrap();
if function.ins.len() > type_queue.len()
{
return Err(format!("Attempted to call function '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", function.name, line, col, function.ins, type_queue));
}
for in_type in &function.ins
{
let actual_type = type_queue.remove(0);
if in_type != &actual_type
{
return Err(format!("Attempted to call function '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", function.name, line, col, in_type, actual_type));
}
}
type_queue.extend_from_slice(&function.outs);
}
Operation::If(if_block, maybe_else_block, line, col) =>
{
if type_queue.is_empty()
{
return Err(format!("Encountered if block with an empty queue at {}:{}", line, col));
}
let comparison_type = type_queue.remove(0);
if comparison_type != Datatype::Bool
{
return Err(format!("Expected a Bool as an if condition but got {:?} instead at {}:{}", comparison_type, line, col));
}
if debug
{
println!("Starting to typecheck if block");
}
let if_ret = get_return_type(if_block, &type_queue, functions, intrinsics, arrays, debug)?;
let else_ret =
if let Some(else_block) = maybe_else_block
{
if debug
{
println!("Starting to typecheck else block");
}
get_return_type(else_block, &type_queue, functions, intrinsics, arrays, debug)?
}
else
{
type_queue.clone()
};
if if_ret != else_ret
{
return Err(format!("Incompatible queue states after if/else construction, expected {:?} but got {:?}", if_ret, else_ret));
}
type_queue.clear();
type_queue.extend_from_slice(&if_ret);
}
Operation::Intrinsic(intrinsic_name, line, col) =>
{
let io = intrinsics.get(intrinsic_name.as_str()).unwrap();
if io.0.len() > type_queue.len()
{
return Err(format!("Attempted to call intrinsic '{}' at {}:{}, with insufficient elements in the queue, expected {:?} but got {:?}", intrinsic_name, line, col, io.0, type_queue));
}
for in_type in &io.0
{
let actual_type = type_queue.remove(0);
if in_type != &actual_type
{
return Err(format!("Attempted to call intrinsic '{}' at {}:{} with a wrong parameter, expected {:?} but got {:?}", intrinsic_name, line, col, in_type, actual_type));
}
}
type_queue.extend_from_slice(&io.1);
}
Operation::While(while_block, line, col) =>
{
if type_queue.is_empty()
{
return Err(format!("Encountered while block with an empty queue at {}:{}", line, col));
}
let comparison_type = type_queue.remove(0);
if comparison_type != Datatype::Bool
{
return Err(format!("Expected a Bool as a while condition but got {:?} instead at {}:{}", comparison_type, line, col));
}
if debug
{
println!("Starting to typecheck while block");
}
let mut outs = type_queue.clone();
outs.insert(0, Datatype::Bool);
typecheck_block(while_block, type_queue, &outs, functions, intrinsics, arrays, debug)?;
}
Operation::Depth(_, _) =>
{
type_queue.push(Datatype::Int);
}
Operation::QueueDiagnostic(line, col) =>
{
println!("---Type queue state at {}:{}---\nlength: {}\n{:?}\n------------------------------", line, col, type_queue.len(), type_queue);
}
Operation::Interrupt(_, _) => {}
Operation::Apply(name, word, line, col) =>
{
match word.as_str()
{
"write" =>
{
if type_queue.is_empty() || type_queue.remove(0) != Datatype::Int
{
return Err(format!("Expected a position for a write application at {}:{}", line, col));
}
let expected_type = arrays.iter().find(|x| &x.name == name).unwrap().datatype;
if type_queue.is_empty()
{
return Err(format!("Expected data for a write application at {}:{}", line, col));
}
let actual_type = type_queue.remove(0);
if actual_type != expected_type
{
return Err(format!("Expected a {:?} value but got a {:?} value at {}:{}", expected_type, actual_type, line, col));
}
}
"read" =>
{
if type_queue.is_empty() || type_queue.remove(0) != Datatype::Int
{
return Err(format!("Expected a position for a read application at {}:{}", line, col));
}
let typ = arrays.iter().find(|x| &x.name == name).unwrap().datatype;
type_queue.push(typ);
}
"length" =>
{
type_queue.push(Datatype::Int);
}
_ => return Err(format!("Encountered unknown application '{}' at {}:{}", word, line, col))
}
}
}
if debug
{
println!("{} => {:?}", debug_string, type_queue);
}
}
return Ok(type_queue.clone());
}
fn validate_function_calls(operations: &Vec<Operation>, functions: &Vec<Function>, arrays: &Vec<Arr>, debug: bool) -> Result<(), String>
{
for function in functions
{
validate_function_calls_in_block(&function.content, functions, arrays, debug)?;
if debug
{
println!("Successfully validated function calls in function '{}'", function.name);
}
}
validate_function_calls_in_block(operations, functions, arrays, debug)?;
if debug
{
println!("Successfully validated function calls in main operations");
}
return Ok(());
}
fn validate_function_calls_in_block(block: &Vec<Operation>, functions: &Vec<Function>, arrays: &Vec<Arr>, debug: bool) -> Result<(), String>
{
for operation in block
{
match operation
{
Operation::Depth(_, _) | Operation::QueueDiagnostic(_, _) | Operation::Intrinsic(_, _, _) | Operation::Enqueue(_, _, _, _) | Operation::Dequeue(_, _) |
Operation::Requeue(_, _) | Operation::Dup(_, _) | Operation::Swap(_, _) | Operation::Apply(_, _, _, _) | Operation::Interrupt(_, _) => {},
Operation::FunctionCall(function_name, line, col) =>
{
if !functions.iter().any(|x| &x.name == function_name)
{
return Err(format!("Call to unknown function '{}' at {}:{}", function_name, line, col));
}
}
Operation::If(if_block, maybe_else_block, _, _) =>
{
validate_function_calls_in_block(if_block, functions, arrays, debug)?;
if let Some(else_block) = maybe_else_block
{
validate_function_calls_in_block(else_block, functions, arrays, debug)?;
}
}
Operation::While(while_block, _, _) =>
{
validate_function_calls_in_block(while_block, functions, arrays, debug)?;
}
}
}
return Ok(());
}
fn extract_arrays(tokens: &mut Vec<Token>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, functions: &Vec<Function>, debug: bool) -> Result<Vec<Arr>, String>
{
let mut tokens_iter = tokens.iter().peekable();
let mut arrays: Vec<Arr> = Vec::new();
let mut new_tokens: Vec<Token> = Vec::new();
while let Some(token) = tokens_iter.next()
{
if let Token::Keyword(word, line, col) = token
{
if word == "arr"
{
if debug
{
println!("Found an array at {}:{}", line, col);
}
if let Some(Token::Keyword(name, _, _)) = tokens_iter.next()
{
if functions.iter().any(|x| &x.name == name)
{
return Err(format!("Cannot redeclare an array with the same name as a function {}:{}", line, col));
}
if arrays.iter().any(|x| &x.name == name)
{
return Err(format!("Cannot redeclare an array with the same name as an array {}:{}", line, col));
}
if intrinsics.contains_key(name.as_str())
{
return Err(format!("An array cannot have the same name as an intrinsic ({}) at {}:{}", name, line, col));
}
if let Some(Token::Keyword(open_curly, _, _)) = tokens_iter.next()
{
if open_curly != "{"
{
return Err(format!("Expected '{{' in array declaration at {}:{}", line, col));
}
}
else
{
return Err(format!("Reached the end of the file while parsing an array at {}:{}", line, col));
}
if let Some(Token::Keyword(typ, _, _)) = tokens_iter.next()
{
let datatype = str_to_datatype(typ, *line, *col)?;
if let Some(Token::IntLit(size_str, _, _)) = tokens_iter.next()
{
let size = size_str.parse::<i64>().unwrap();
if let Some(Token::Keyword(close_curly, _, _)) = tokens_iter.next()
{
if close_curly != "}"
{
return Err(format!("Expected '}}' in array declaration at {}:{}", line, col));
}
}
else
{
return Err(format!("Reached the end of the file while parsing an array at {}:{}", line, col));
}
let mut data: Vec<String> = Vec::new();
let default_val = match datatype
{
Datatype::String => String::new(),
Datatype::Bool => String::from("false"),
Datatype::Int => String::from("0"),
};
for _ in 0..size
{
data.push(default_val.clone());
}
arrays.push(Arr { name: sanitize_name(name.clone()), datatype, length: size , data });
}
}
else
{
return Err(format!("Reached the end of the file while parsing an array at {}:{}", line, col))
}
}
else
{
return Err(format!("Expected array name, at {}:{}", line, col));
}
}
else
{
new_tokens.push(token.clone());
}
}
else
{
new_tokens.push(token.clone());
}
}
tokens.clear();
tokens.extend_from_slice(&new_tokens);
return Ok(arrays);
}
fn sanitize_name(name: String) -> String
{
return name.replace("-", "_").replace("+", "_");
}
fn str_to_datatype(s: &str, line: i32, col: i32) -> Result<Datatype, String>
{
match s
{
//"any" => Ok(Datatype::Any),
"bool" => Ok(Datatype::Bool),
"int" => Ok(Datatype::Int),
"str" => Ok(Datatype::String),
_ => return Err(format!("Expected a datatype for the array, got {} instead at {}:{}", s, line, col))
}
}
fn extract_functions(tokens: &mut Vec<Token>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, debug: bool) -> Result<Vec<Function>, String>
{
let mut tokens_iter = tokens.iter().peekable();
let mut functions: Vec<Function> = Vec::new();
let mut new_tokens: Vec<Token> = Vec::new();
while let Some(token) = tokens_iter.next()
{
if let Token::Keyword(word, line, col) = token
{
if word == "function"
{
if debug
{
println!("Found a function at {}:{}", line, col);
}
let mut ins: Vec<Datatype> = Vec::new();
loop
{
let maybe_token = tokens_iter.next();
match maybe_token
{
Some(token) =>
{
match token
{
Token::IntLit(_, line, col) | Token::StringLit(_, line, col) | Token::BoolLit(_, line, col) |
Token::Apply(_, _, line, col) =>
{
return Err(format!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col));
}
Token::Keyword(word, line, col) =>
{
if word == "=>"
{
break;
}
match word.as_str()
{
//"any" => ins.push(Datatype::Any),
"str" => ins.push(Datatype::String),
"int" => ins.push(Datatype::Int),
"bool" => ins.push(Datatype::Bool),
_ => return Err(format!("Expected input parameters for a function but got {} instead at {}:{}", word, line, col))
}
}
}
}
None => return Err(format!("Unexpected end of file while extracting a function"))
}
}
if debug
{
println!("ins: {:?}", ins);
}
let mut outs: Vec<Datatype> = Vec::new();
loop
{
let maybe_token = tokens_iter.next();
match maybe_token
{
Some(token) =>
{
match token
{
Token::IntLit(_, line, col) | Token::StringLit(_, line, col) | Token::BoolLit(_, line, col) |
Token::Apply(_, _, line, col) =>
{
return Err(format!("Expected input parameters for a function but got {:?} instead at {}:{}", token, line, col));
}
Token::Keyword(word, line, col) =>
{
match word.as_str()
{
//"any" => outs.push(Datatype::Any),
"str" => outs.push(Datatype::String),
"int" => outs.push(Datatype::Int),
"bool" => outs.push(Datatype::Bool),
"{" | "}" | "deq" | "req" | "dup" | "swp" | "true" | "false" | "depth" | "???" => return Err(format!("Expected function name but got {} at {}:{}", word, line, col)),
_ =>
{
if functions.iter().any(|x| &x.name == word)
{
return Err(format!("Redeclaration of function '{}' at {}:{}", word, line, col));
}
if intrinsics.contains_key(word.as_str())
{
return Err(format!("Function name {} at {}:{} is already an intrinsic", word, line, col));
}
if debug
{
println!("outs: {:?}", outs);
}
let block = parse_block(&mut tokens_iter, intrinsics, debug)?;
functions.push(Function {name: sanitize_name(word.clone()), ins, outs, content: block});
break;
}
}
}
}
}
None => return Err(format!("Unexpected end of file while extracting a function"))
}
}
}
else
{
new_tokens.push(token.clone());
}
}
else
{
new_tokens.push(token.clone());
}
}
tokens.clear();
tokens.extend_from_slice(&new_tokens);
return Ok(functions);
}
fn parse_block(tokens_iter: &mut Peekable<std::slice::Iter<Token>>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, debug: bool) -> Result<Vec<Operation>, String>
{
if let Some(Token::Keyword(word, line, col)) = tokens_iter.next()
{
if word != "{"
{
return Err(format!("Expected '{{' to open a block but got {} at {}:{}", word, line, col));
}
}
else
{
return Err(format!("Expected '{{' to open a block"));
}
return parse_until_delimiter(tokens_iter, intrinsics, Some("}"), debug);
}
fn parse_until_delimiter(tokens_iter: &mut Peekable<std::slice::Iter<Token>>, intrinsics: &HashMap<&str, (Vec<Datatype>, Vec<Datatype>)>, delimiter: Option<&str>, debug: bool) -> Result<Vec<Operation>, String>
{
let mut operations: Vec<Operation> = Vec::new();
loop
{
let maybe_token = tokens_iter.next();
match maybe_token
{
Some(token) =>
{
match token
{
Token::IntLit(value, line, col) =>
{
operations.push(Operation::Enqueue(Datatype::Int, value.clone(), *line, *col));
}
Token::StringLit(value, line, col) =>
{
operations.push(Operation::Enqueue(Datatype::String, value.clone(), *line, *col));
}
Token::BoolLit(value, line, col) =>
{
operations.push(Operation::Enqueue(Datatype::Bool, value.clone(), *line, *col));
}
Token::Apply(name, word, line, col) =>
{
operations.push(Operation::Apply(sanitize_name(name.clone()), word.clone(), *line, *col));
}
Token::Keyword(word, line, col) =>
{
if intrinsics.contains_key(word.as_str())
{
operations.push(Operation::Intrinsic(word.clone(), *line, *col));
}
else if word == "if"
{
let block = parse_block(tokens_iter, intrinsics, debug)?;
let else_block =
if let Some(Token::Keyword(maybe_else, _, _)) = tokens_iter.peek()
{
if maybe_else == "else"
{
tokens_iter.next();
Some(parse_block(tokens_iter, intrinsics, debug)?)
}
else
{
None
}
}
else
{
None
};
operations.push(Operation::If(block, else_block, *line, *col));
}
else if word == "while"
{
operations.push(Operation::While(parse_block(tokens_iter, intrinsics, debug)?, *line, *col));
}
else if word == "deq"
{
operations.push(Operation::Dequeue(*line, *col));
}
else if word == "req"
{
operations.push(Operation::Requeue(*line, *col));
}
else if word == "dup"
{
operations.push(Operation::Dup(*line, *col));
}
else if word == "swp"
{
operations.push(Operation::Swap(*line, *col));
}
else if word == "depth"
{
operations.push(Operation::Depth(*line, *col));
}
else if word == "???"
{
operations.push(Operation::QueueDiagnostic(*line, *col));
}
else if word == "interrupt"
{
operations.push(Operation::Interrupt(*line, *col));
}
else if Some(word.as_str()) == delimiter
{
return Ok(operations);
}
else if word == "{" || word == "function"
{
return Err(format!("Unexpected keyword {} at {}:{}", word, line, col));
}
else
{
operations.push(Operation::FunctionCall(sanitize_name(word.clone()), *line, *col));
}
}
}
}
None =>
{
if delimiter.is_some()
{
return Err(format!("Reached the end of the file while parsing a block"));
}
else
{
return Ok(operations);
}
}
}
}
}
fn usage()
{
println!("Usage: kurz -c path/to/file");
exit(0);
}
fn tokenize(text: &str) -> Result<Vec<Token>, String>
{
let mut tokens: Vec<Token> = Vec::new();
let mut line = 1;
let mut col = 1;
let mut state = TokenizerState::Whitespace;
let mut word = String::new();
let mut iter = text.chars().peekable();
let mut application_name = String::new();
while let Some(ch) = iter.next()
{
if ch == '/' && iter.peek() == Some(&'/')
{
state = TokenizerState::Comment;
}
match state
{
TokenizerState::Comment =>
{
if ch == '\n'
{
state = TokenizerState::Whitespace;
}
}
TokenizerState::Whitespace =>
{
// If ch is whitespace, do nothing
if !ch.is_whitespace()
{
match ch
{
'"' =>
{
state = TokenizerState::Quote;
}
_ =>
{
state = TokenizerState::Keyword;
word.push(ch);
}
}
}
}
TokenizerState::Quote =>
{
if ch == '"'
{
state = TokenizerState::Whitespace;
tokens.push(Token::StringLit(word.clone().replace("\\n", "\n"), line, col));
word.clear();
}
else
{
word.push(ch);
}
}
TokenizerState::Keyword =>
{
if ch.is_whitespace()
{
state = TokenizerState::Whitespace;
if application_name.is_empty()
{
if let Ok(_) = word.parse::<i64>()
{
tokens.push(Token::IntLit(word.clone(), line, col));
}
else if word == "true" || word == "false"
{
tokens.push(Token::BoolLit(word.clone(), line, col));
}
else
{
tokens.push(Token::Keyword(word.clone(), line, col));
}
}
else
{
tokens.push(Token::Apply(sanitize_name(application_name.clone()), word.clone(), line, col));
application_name.clear();
}
word.clear();
}
else
{
match ch
{
'"' => return Err(format!("Having '\"' in the middle of a word is not allowed")),
'.' =>
{
application_name = word.clone();
word.clear();
}
_ =>
{
word.push(ch);
}
}
}
}
}
col += 1;
if ch == '\n'
{
col = 1;
line += 1;
}
}
match state
{
TokenizerState::Quote =>
{
return Err(format!("Encountered EOF before closing string"));
}
TokenizerState::Whitespace | TokenizerState::Comment => {},
TokenizerState::Keyword =>
{
if application_name.is_empty()
{
tokens.push(Token::Keyword(word.clone(), line, col));
}
else
{
tokens.push(Token::Apply(sanitize_name(application_name.clone()), word.clone(), line, col));
}
}
}
Ok(tokens)
}