Implement function extraction and their removal from the token stream

2022-12-05 00:38:20 +01:00
parent 9976ef9fe9
commit 6a2bc25eaf
2 changed files with 217 additions and 29 deletions
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,28 +1,205 @@
 use core::panic;
 use std::env;
 use std::fs;
 use std::process::exit;
-#[derive(Debug)]
+#[derive(Debug, Clone, PartialEq)]
 enum Token
 {
 	StringLit(String, i32, i32),
-	Intrinsic(String, i32, i32),
+	IntLit(i64, i32, i32),
 	Keyword(String, i32, i32),
 }
 enum TokenizerState
 {
 	Whitespace,
 	Quote,
 	Keyword,
 	Comment,
 }
 #[derive(Debug,Clone,Copy)]
 enum Datatype
 {
 	Int,
 	String,
 	Pointer,
 	Any,
 }
 #[derive(Debug)]
 struct Function
 {
 	name: String,
 	ins: Vec<Datatype>,
 	outs: Vec<Datatype>,
 	content: Vec<Token>
 }
 fn main()
 {
 	let args: Vec<String> = env::args().collect();
 	if args.len() < 2
 	{
 		usage()
 	}
 	match args[1].as_str()
 	{
 		"-c" | "--compile" =>
 		{
 			let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
-			let tokens: Vec<Token> = tokenize(&file_content);
+			let mut tokens: Vec<Token> = tokenize(&file_content);
 			println!("{:?}", tokens);
 			let functions: Vec<Function> = extract_functions(&mut tokens);
 			println!("{:?}", tokens);
 			println!("{:?}", functions);
 		}
-		_ => panic!("Unknown option")
+		_ => panic!("Unknown option {}", args[1])
 	}
 }
 fn extract_functions(tokens: &mut Vec<Token>) -> Vec<Function>
 {
 	let mut functions: Vec<Function> = Vec::new();
 	let mut state = FunctionExtractionState::Outside;
 	let mut ins: Vec<Datatype> = Vec::new();
 	let mut outs: Vec<Datatype> = Vec::new();
 	let mut function_name = String::from("");
 	let mut content: Vec<Token> = Vec::new();
 	let mut indices_to_remove: Vec<usize> = Vec::new();
 	for (i, token) in tokens.iter().enumerate()
 	{
 		match state
 		{
 			FunctionExtractionState::Outside =>
 			{
 				if let Token::Keyword(name, _, _) = token
 				{
 					if name == &String::from("function")
 					{
 						state = FunctionExtractionState::Ins;
 					}
 				}
 			}
 			FunctionExtractionState::Ins =>
 			{
 				match token
 				{
 					Token::Keyword(name, line, col) =>
 					{
 						match name.as_str()
 						{
 							"int" => ins.push(Datatype::Int),
 							"str" => ins.push(Datatype::String),
 							"ptr" => ins.push(Datatype::Pointer),
 							"any" => ins.push(Datatype::Any),
 							"=>" => state = FunctionExtractionState::Outs,
 							_ => panic!("Unknown datatype '{}' at {}:{}", name, line, col)
 						}
 					},
 					Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
 				}
 			}
 			FunctionExtractionState::Outs =>
 			{
 				match token
 				{
 					Token::Keyword(name, _, _) =>
 					{
 						match name.as_str()
 						{
 							"int" => outs.push(Datatype::Int),
 							"str" => outs.push(Datatype::String),
 							"ptr" => outs.push(Datatype::Pointer),
 							"any" => outs.push(Datatype::Any),
 							_ =>
 							{
 								if let Token::Keyword(name, _, _) = token
 								{
 									if functions.iter().any(|x| &x.name == name)
 									{
 										panic!("A function with name {} already exists", name);
 									}
 									function_name = name.clone();
 								}
 								else
 								{
 									panic!("Expected a function name") // TODO: Add location
 								}
 								state =FunctionExtractionState::OpenCurly;
 							}
 						}
 					},
 					Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
 				}
 			}
 			FunctionExtractionState::OpenCurly =>
 			{
 				if let Token::Keyword(name, line, col) = token
 				{
 					if name == "{"
 					{
 						state = FunctionExtractionState::Body
 					}
 					else
 					{
 						panic!("Expected '{{' to open the function's body at {}:{}", line, col)
 					}
 				}
 				else
 				{
 					panic!("Expected '{{' to open the function's body") // TODO: Add location
 				}
 			}
 			FunctionExtractionState::Body =>
 			{
 				if let Token::Keyword(name, _, _) = token
 				{
 					if name == "}"
 					{
 						state = FunctionExtractionState::Outside;
 						functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()});
 						function_name.clear();
 						ins.clear();
 						outs.clear();
 						content.clear();
 						indices_to_remove.push(i);
 						continue;
 					}
 				}
 				content.push(token.clone());
 			}
 		}
 		if state != FunctionExtractionState::Outside
 		{
 			indices_to_remove.push(i);
 		}
 	}
 	indices_to_remove.reverse();
 	for i in indices_to_remove
 	{
 		tokens.remove(i);
 	}
 	return functions;
 }
 #[derive(Debug, PartialEq)]
 enum FunctionExtractionState
 {
 	Outside,
 	Ins,
 	Outs,
 	OpenCurly,
 	Body,
 }
 fn usage()
 {
 	println!("Usage: kurz -c path/to/file");
 	exit(0);
 }
 fn tokenize(text: &str) -> Vec<Token>
 {
 	let mut tokens: Vec<Token> = Vec::new();
@@ -30,10 +207,22 @@ fn tokenize(text: &str) -> Vec<Token>
 	let mut col = 1;
 	let mut state = TokenizerState::Whitespace;
 	let mut word = String::new();
-	for ch in text.chars()
+	let mut iter = text.chars().peekable();
 	while let Some(ch) = iter.next()
 	{
 		if ch == '/' && iter.peek() == Some(&'/')
 		{
 			state = TokenizerState::Comment;
 		}
 		match state
 		{
 			TokenizerState::Comment =>
 			{
 				if ch == '\n'
 				{
 					state = TokenizerState::Whitespace;
 				}
 			}
 			TokenizerState::Whitespace =>
 			{
 				// If ch is whitespace, do nothing
@@ -47,7 +236,7 @@ fn tokenize(text: &str) -> Vec<Token>
 						}
 						_ =>
 						{
-							state = TokenizerState::Rest;
+							state = TokenizerState::Keyword;
 							word.push(ch);
 						}
 					}
@@ -66,17 +255,20 @@ fn tokenize(text: &str) -> Vec<Token>
 					word.push(ch);
 				}
 			}
-			TokenizerState::Rest =>
+			TokenizerState::Keyword =>
 			{
 				if ch.is_whitespace()
 				{
 					state = TokenizerState::Whitespace;
-					let token: Token = match word.as_str()
+					if let Ok(number) = word.parse::<i64>()
 					{
-						"print" => Token::Intrinsic(word.clone(), line, col),
+						tokens.push(Token::IntLit(number, line, col));
-						_ => todo!("Unknown word {}", word)
+					}
-					};
+					else
-					tokens.push(token);
+					{
 						tokens.push(Token::Keyword(word.clone(), line, col));
 					}
 					word.clear();
 				}
 				else
 				{
@@ -104,24 +296,11 @@ fn tokenize(text: &str) -> Vec<Token>
 		{
 			panic!("Encountered EOF before closing string");
 		}
-		TokenizerState::Whitespace => {},
+		TokenizerState::Whitespace | TokenizerState::Comment => {},
-		TokenizerState::Rest =>
+		TokenizerState::Keyword =>
 		{
-			//TODO: extract this as it is duplicate work with Rest handling in the loop
+			tokens.push(Token::Keyword(word.clone(), line, col));
 			let token: Token = match word.as_str()
 			{
 				"print" => Token::Intrinsic(word.clone(), line, col),
 				_ => todo!("Unknown word {}", word)
 			};
 			tokens.push(token);
 		}
 	}
 	tokens
 }
 enum TokenizerState
 {
 	Whitespace,
 	Quote,
 	Rest,
 }
--- a/test.qbl
+++ b/test.qbl
@@ -1 +1,10 @@
-"Hello, World!\n" print
+"Hello, World!\n" print 43 foo foo deq
 // Dequeues, enqueues 42 and 17, prints the head
 function any => int foo
 {
 	deq 42 17 print
 }
 "test2" print