Implement function extraction and their removal from the token stream

2022-12-05 00:38:20 +01:00
parent 9976ef9fe9
commit 6a2bc25eaf
2 changed files with 217 additions and 29 deletions
--- a/src/main.rs
+++ b/src/main.rs
@ -1,28 +1,205 @@
+use core::panic;
 use std::env;
 use std::fs;
+use std::process::exit;

-#[derive(Debug)]
+#[derive(Debug, Clone, PartialEq)]
 enum Token
 {
 	StringLit(String, i32, i32),
-	Intrinsic(String, i32, i32),
+	IntLit(i64, i32, i32),
+	Keyword(String, i32, i32),
+}
+enum TokenizerState
+{
+	Whitespace,
+	Quote,
+	Keyword,
+	Comment,
+}
+
+#[derive(Debug,Clone,Copy)]
+enum Datatype
+{
+	Int,
+	String,
+	Pointer,
+	Any,
+}
+
+#[derive(Debug)]
+struct Function
+{
+	name: String,
+	ins: Vec<Datatype>,
+	outs: Vec<Datatype>,
+	content: Vec<Token>
 }

 fn main()
 {
 	let args: Vec<String> = env::args().collect();
+	if args.len() < 2
+	{
+		usage()
+	}
 	match args[1].as_str()
 	{
 		"-c" | "--compile" =>
 		{
 			let file_content = fs::read_to_string(&args[2]).expect("Could not read the source file");
-			let tokens: Vec<Token> = tokenize(&file_content);
+			let mut tokens: Vec<Token> = tokenize(&file_content);
 			println!("{:?}", tokens);
+			let functions: Vec<Function> = extract_functions(&mut tokens);
+			println!("{:?}", tokens);
+			println!("{:?}", functions);
 		}
-		_ => panic!("Unknown option")
+		_ => panic!("Unknown option {}", args[1])
 	}
 }

+fn extract_functions(tokens: &mut Vec<Token>) -> Vec<Function>
+{
+	let mut functions: Vec<Function> = Vec::new();
+	let mut state = FunctionExtractionState::Outside;
+	let mut ins: Vec<Datatype> = Vec::new();
+	let mut outs: Vec<Datatype> = Vec::new();
+	let mut function_name = String::from("");
+	let mut content: Vec<Token> = Vec::new();
+	let mut indices_to_remove: Vec<usize> = Vec::new();
+	for (i, token) in tokens.iter().enumerate()
+	{
+		match state
+		{
+			FunctionExtractionState::Outside =>
+			{
+				if let Token::Keyword(name, _, _) = token
+				{
+					if name == &String::from("function")
+					{
+						state = FunctionExtractionState::Ins;
+					}
+				}
+			}
+			FunctionExtractionState::Ins =>
+			{
+				match token
+				{
+					Token::Keyword(name, line, col) =>
+					{
+						match name.as_str()
+						{
+							"int" => ins.push(Datatype::Int),
+							"str" => ins.push(Datatype::String),
+							"ptr" => ins.push(Datatype::Pointer),
+							"any" => ins.push(Datatype::Any),
+							"=>" => state = FunctionExtractionState::Outs,
+							_ => panic!("Unknown datatype '{}' at {}:{}", name, line, col)
+						}
+					},
+					Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
+				}
+			}
+			FunctionExtractionState::Outs =>
+			{
+				match token
+				{
+					Token::Keyword(name, _, _) =>
+					{
+						match name.as_str()
+						{
+							"int" => outs.push(Datatype::Int),
+							"str" => outs.push(Datatype::String),
+							"ptr" => outs.push(Datatype::Pointer),
+							"any" => outs.push(Datatype::Any),
+							_ =>
+							{
+								if let Token::Keyword(name, _, _) = token
+								{
+									if functions.iter().any(|x| &x.name == name)
+									{
+										panic!("A function with name {} already exists", name);
+									}
+									function_name = name.clone();
+								}
+								else
+								{
+									panic!("Expected a function name") // TODO: Add location
+								}
+								state =FunctionExtractionState::OpenCurly;
+							}
+						}
+					},
+					Token::StringLit(_, line, col) | Token::IntLit(_, line, col) => panic!("Expected datatype for function declaration at {}:{}", line, col),
+				}
+			}
+			FunctionExtractionState::OpenCurly =>
+			{
+				if let Token::Keyword(name, line, col) = token
+				{
+					if name == "{"
+					{
+						state = FunctionExtractionState::Body
+					}
+					else
+					{
+						panic!("Expected '{{' to open the function's body at {}:{}", line, col)
+					}
+				}
+				else
+				{
+					panic!("Expected '{{' to open the function's body") // TODO: Add location
+				}
+			}
+			FunctionExtractionState::Body =>
+			{
+				if let Token::Keyword(name, _, _) = token
+				{
+					if name == "}"
+					{
+						state = FunctionExtractionState::Outside;
+						functions.push(Function { name: function_name.clone(), ins: ins.clone() , outs: outs.clone(), content: content.clone()});
+						function_name.clear();
+						ins.clear();
+						outs.clear();
+						content.clear();
+						indices_to_remove.push(i);
+						continue;
+					}
+				}
+				content.push(token.clone());
+			}
+		}
+
+		if state != FunctionExtractionState::Outside
+		{
+			indices_to_remove.push(i);
+		}
+	}
+	indices_to_remove.reverse();
+	for i in indices_to_remove
+	{
+		tokens.remove(i);
+	}
+	return functions;
+}
+
+#[derive(Debug, PartialEq)]
+enum FunctionExtractionState
+{
+	Outside,
+	Ins,
+	Outs,
+	OpenCurly,
+	Body,
+}
+
+fn usage()
+{
+	println!("Usage: kurz -c path/to/file");
+	exit(0);
+}
+
 fn tokenize(text: &str) -> Vec<Token>
 {
 	let mut tokens: Vec<Token> = Vec::new();
@ -30,10 +207,22 @@ fn tokenize(text: &str) -> Vec<Token>
 	let mut col = 1;
 	let mut state = TokenizerState::Whitespace;
 	let mut word = String::new();
-	for ch in text.chars()
+	let mut iter = text.chars().peekable();
+	while let Some(ch) = iter.next()
 	{
+		if ch == '/' && iter.peek() == Some(&'/')
+		{
+			state = TokenizerState::Comment;
+		}
 		match state
 		{
+			TokenizerState::Comment =>
+			{
+				if ch == '\n'
+				{
+					state = TokenizerState::Whitespace;
+				}
+			}
 			TokenizerState::Whitespace =>
 			{
 				// If ch is whitespace, do nothing
@ -47,7 +236,7 @@ fn tokenize(text: &str) -> Vec<Token>
 						}
 						_ =>
 						{
-							state = TokenizerState::Rest;
+							state = TokenizerState::Keyword;
 							word.push(ch);
 						}
 					}
@ -66,17 +255,20 @@ fn tokenize(text: &str) -> Vec<Token>
 					word.push(ch);
 				}
 			}
-			TokenizerState::Rest =>
+			TokenizerState::Keyword =>
 			{
 				if ch.is_whitespace()
 				{
 					state = TokenizerState::Whitespace;
-					let token: Token = match word.as_str()
+					if let Ok(number) = word.parse::<i64>()
 					{
-						"print" => Token::Intrinsic(word.clone(), line, col),
-						_ => todo!("Unknown word {}", word)
-					};
-					tokens.push(token);
+						tokens.push(Token::IntLit(number, line, col));
+					}
+					else
+					{
+						tokens.push(Token::Keyword(word.clone(), line, col));
+					}
+					word.clear();
 				}
 				else
 				{
@ -104,24 +296,11 @@ fn tokenize(text: &str) -> Vec<Token>
 		{
 			panic!("Encountered EOF before closing string");
 		}
-		TokenizerState::Whitespace => {},
-		TokenizerState::Rest =>
+		TokenizerState::Whitespace | TokenizerState::Comment => {},
+		TokenizerState::Keyword =>
 		{
-			//TODO: extract this as it is duplicate work with Rest handling in the loop
-			let token: Token = match word.as_str()
-			{
-				"print" => Token::Intrinsic(word.clone(), line, col),
-				_ => todo!("Unknown word {}", word)
-			};
-			tokens.push(token);
+			tokens.push(Token::Keyword(word.clone(), line, col));
 		}
 	}
 	tokens
 }
-
-enum TokenizerState
-{
-	Whitespace,
-	Quote,
-	Rest,
-}
--- a/test.qbl
+++ b/test.qbl
@ -1 +1,10 @@
-"Hello, World!\n" print
+"Hello, World!\n" print 43 foo foo deq
+
+
+// Dequeues, enqueues 42 and 17, prints the head
+function any => int foo
+{
+	deq 42 17 print
+}
+
+"test2" print