commit 721383a043d82d97ed57bf37e7d4b6498c681443 Author: 0x4261756D <38735823+0x4261756D@users.noreply.github.com> Date: Fri Sep 15 11:07:50 2023 +0200 Add tokenizer diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ee7098f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +zig-out/ +zig-cache/ diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..93bf74e --- /dev/null +++ b/build.zig @@ -0,0 +1,77 @@ +const std = @import("std"); + +// Although this function looks imperative, note that its job is to +// declaratively construct a build graph that will be executed by an external +// runner. +pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + + const exe = b.addExecutable(.{ + .name = "luaaaaah_zig", + // In this case the main source file is merely a path, however, in more + // complicated build scripts, this could be a generated file. + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = optimize, + }); + + exe.addModule("types", b.addModule("types", .{ + .source_file = .{ .path = "src/types.zig" } + })); + exe.addModule("tokenizer", b.addModule("tokenizer", .{ + .source_file = .{ .path = "src/tokenizer.zig" } + })); + + // This declares intent for the executable to be installed into the + // standard location when the user invokes the "install" step (the default + // step when running `zig build`). + b.installArtifact(exe); + + // This *creates* a Run step in the build graph, to be executed when another + // step is evaluated that depends on it. The next line below will establish + // such a dependency. + const run_cmd = b.addRunArtifact(exe); + + // By making the run step depend on the install step, it will be run from the + // installation directory rather than directly from within the cache directory. + // This is not necessary, however, if the application depends on other installed + // files, this ensures they will be present and in the expected location. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // This creates a build step. It will be visible in the `zig build --help` menu, + // and can be selected like this: `zig build run` + // This will evaluate the `run` step rather than the default, which is "install". + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + // Creates a step for unit testing. This only builds the test executable + // but does not run it. + const unit_tests = b.addTest(.{ + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = optimize, + }); + + const run_unit_tests = b.addRunArtifact(unit_tests); + + // Similar to creating the run step earlier, this exposes a `test` step to + // the `zig build --help` menu, providing a way for the user to request + // running the unit tests. + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_unit_tests.step); +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..1bd6502 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,44 @@ +const std = @import("std"); +const tokenize = @import("tokenizer").tokenize; + +pub fn main() !void +{ + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + const allocator = gpa.allocator(); + + const args = try std.process.argsAlloc(allocator); + defer std.process.argsFree(allocator, args); + const file = try std.fs.cwd().openFile(args[1], .{}); + defer file.close(); + const content = try file.readToEndAlloc(allocator, 13000); + defer allocator.free(content); + const tokens = try tokenize(content, allocator); + //std.debug.print("tokens: {any}", .{tokens}); + for(tokens) |token| + { + switch(token.tokenData) + { + .string => |*data| std.debug.print("string: {s} {*}\n", .{data.*, data.ptr}), + .numeral => |*data| std.debug.print("numeral: {any} {*}\n", .{data.*, data}), + .none => |*data| std.debug.print("none {*}\n", .{data}) + } + } + defer + { + var i: usize = 0; + while(i < tokens.len) + { + switch(tokens[i].tokenData) + { + .string => |*data| + { + allocator.free(data.*); + }, + else => {} + } + i += 1; + } + allocator.free(tokens); + } +} diff --git a/src/tokenizer.zig b/src/tokenizer.zig new file mode 100644 index 0000000..2f76bee --- /dev/null +++ b/src/tokenizer.zig @@ -0,0 +1,1199 @@ +const types = @import("types.zig"); +const std = @import("std"); + +const TokenType = enum +{ + Name, + And, Break, Do, Else, Elseif, End, + False, For, Function, Goto, If, In, + Local, Nil, Not, Or, Repeat, Return, + Then, True, Until, While, + Plus, Minus, Star, Slash, Percent, Caret, Hash, + Ampersand, Tilde, Pipe, LtLt, GtGt, SlashSlash, + EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals, + RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon, + Semicolon, Colon, Comma, Dot, DotDot, DotDotDot, + Numeral, + StringLiteral, + +}; + +const TokenData = union(enum) +{ + string: []u8, + numeral: types.Numeral, + none, +}; + +const Token = struct +{ + tokenType: TokenType, + tokenData: TokenData, +}; + +const TokenizerState = enum +{ + Start, + Quote, SingleQuote, Name, Number, Zero, + A, B, D, E, F, G, I, L, N, O, R, T, U, W, + Plus, Minus, Star, Slash, Percent, Caret, Hash, + Ampersand, Tilde, Pipe, Lt, Gt, Equals, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, + Colon, Semicolon, Comma, Dot, + + An, Br, Do, El, En, Fa, Fo, Fu, Go, If, In, Lo, Ni, No, Or, Re, Th, Tr, Un, Wh, + LtLt, GtGt, SlashSlash, EqualsEquals, TildeEquals, LtEquals, GtEquals, ColonColon, DotDot, + SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber, + + And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi, + DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ, + BigCommentLongBracketStart, SmallComment, + + Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber, + BigComment, BigCommentLongBracketEnd, + + Break, Elsei, False, Funct, Local, Repea, Retur, Until, While, + + Elseif, Functi, Repeat, Return, + + Functio, + + Function, +}; + +fn tokenizeUpdateIndexAndState(lastIndex: *?usize, index: ?usize, state: *TokenizerState, newState: TokenizerState) void +{ + lastIndex.* = index; + state.* = newState; +} +fn tokenizeTerminalBase(lastIndex: *?usize, index: ?usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: ?TokenType, newState: TokenizerState) void +{ + tokenizeUpdateIndexAndState(lastIndex, index, state, newState); + tokenType.* = newTokenType; +} +fn tokenizeTerminalStr(lastIndex: *?usize, index: usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: ?TokenType, newState: TokenizerState, tokenStr: *std.ArrayList(u8), ch: u8) !void +{ + tokenizeTerminalBase(lastIndex, index, tokenType, state, newTokenType, newState); + try tokenStr.append(ch); +} +fn tokenizeTerminalIntNum(lastIndex: *?usize, index: usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: TokenType, newState: TokenizerState, tokenNumeral: *?types.Numeral, ch: u8) !void +{ + tokenizeTerminalBase(lastIndex, index, tokenType, state, newTokenType, newState); + if(!std.ascii.isDigit(ch)) + { + return error.NoDigit; + } + const digitValue = @as(i64, ch - '0'); + if(tokenNumeral.* != null) + { + tokenNumeral.* = types.Numeral { .Integer = digitValue }; + } + else + { + switch(tokenNumeral.*.?) + { + .Integer => |*n| n.* = n.* * 10 + digitValue, + .Float => return error.ExpectedIntGotFloat + } + } +} +fn tokenizeTerminalNoToken(lastIndex: *?usize, index: usize, state: *TokenizerState, newState: TokenizerState, tokenStr: *std.ArrayList(u8), ch: u8) !void +{ + tokenizeUpdateIndexAndState(lastIndex, index, state, newState); + try tokenStr.*.append(ch); +} +fn tokenizeBacktrack(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, allocator: std.mem.Allocator) !void +{ + try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, tokenType.*.?, allocator); +} +fn tokenizeBacktrackCustomToken(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, newTokenType: TokenType, allocator: std.mem.Allocator) !void +{ + if(lastIndex.* == null or tokenType.* == null) + { + return error.LexError; + } + if(newTokenType == TokenType.StringLiteral or newTokenType == TokenType.Name) + { + const content = try allocator.alloc(u8, tokenStr.*.items.len); + @memcpy(content, tokenStr.*.items); + try tokens.append(Token { .tokenType = newTokenType, .tokenData = TokenData { .string = content } }); + } + else + { + try tokens.append(Token { .tokenType = newTokenType, .tokenData = if(tokenType.*.? == TokenType.Numeral) TokenData { .numeral = tokenNumeral.*.? } + else TokenData.none + }); + } + tokenNumeral.* = null; + index.* = lastIndex.*.?; + tokenStr.*.clearAndFree(); + tokenizeTerminalBase(lastIndex, null, tokenType, state, null, TokenizerState.Start); +} +fn tokenizeAlphanumericNonstart(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, ch: u8, newTokenType: TokenType, allocator: std.mem.Allocator) !void +{ + if(std.ascii.isAlphanumeric(ch) or ch == '_') + { + try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.Name, tokenStr, ch); + } + else + { + try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, newTokenType, allocator); + } +} +fn tokenizeChar(state: *TokenizerState, ch: u8, lastIndex: *?usize, index: *usize, tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, tokens: *std.ArrayList(Token), longBracketLevel: *u32, allocator: std.mem.Allocator) !void +{ + switch(state.*) + { + TokenizerState.Start => + { + switch(ch) + { + '-' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Minus, TokenizerState.Minus), + ',' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Comma, TokenizerState.Comma), + '=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Equals, TokenizerState.Equals), + '(' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.RoundOpen, TokenizerState.RoundOpen), + ')' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.RoundClosed, TokenizerState.RoundClosed), + '.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Dot, TokenizerState.Dot), + ':' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Colon, TokenizerState.Colon), + '{' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.CurlyOpen, TokenizerState.CurlyOpen), + '}' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.CurlyClosed, TokenizerState.CurlyClosed), + '[' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SquareOpen, TokenizerState.SquareOpen), + ']' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SquareClosed, TokenizerState.SquareClosed), + '+' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Plus, TokenizerState.Plus), + '~' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Tilde, TokenizerState.Tilde), + '>' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Gt, TokenizerState.Gt), + '<' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Lt, TokenizerState.Lt), + '#' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Hash, TokenizerState.Hash), + '|' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Pipe, TokenizerState.Pipe), + '&' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Ampersand, TokenizerState.Ampersand), + '%' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Percent, TokenizerState.Percent), + '*' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Star, TokenizerState.Star), + '/' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Slash, TokenizerState.Slash), + ';' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Semicolon, TokenizerState.Semicolon), + '^' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Caret, TokenizerState.Caret), + 'a' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.A, tokenStr, ch), + 'b' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.B, tokenStr, ch), + 'd' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.D, tokenStr, ch), + 'e' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.E, tokenStr, ch), + 'f' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.F, tokenStr, ch), + 'i' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.I, tokenStr, ch), + 'g' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.G, tokenStr, ch), + 'l' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.L, tokenStr, ch), + 'n' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.N, tokenStr, ch), + 'o' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.O, tokenStr, ch), + 'r' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.R, tokenStr, ch), + 't' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.T, tokenStr, ch), + 'u' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.U, tokenStr, ch), + 'w' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.W, tokenStr, ch), + '0' => try tokenizeTerminalIntNum(lastIndex, index.*, tokenType, state, TokenType.Numeral, TokenizerState.Zero, tokenNumeral, ch), + '"' => + { + tokenType.* = null; + state.* = TokenizerState.Quote; + }, + '\'' => + { + tokenType.* = null; + state.* = TokenizerState.SingleQuote; + }, + else => + { + if(std.ascii.isWhitespace(ch)) + { + + } + else if(std.ascii.isAlphabetic(ch) or ch == '_') + { + try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.Name, tokenStr, ch); + } + else if(std.ascii.isDigit(ch)) + { + try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Numeral, TokenizerState.Name, tokenStr, ch); + } + else + { + std.debug.print("{}: {c}\n", .{state.*, ch}); + return error.NotImplemented; + } + } + } + }, + TokenizerState.Quote => + { + switch(ch) + { + '\\' => state.* = TokenizerState.QuoteBackslash, + '"' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String), + else => try tokenStr.*.append(ch), + } + }, + TokenizerState.QuoteBackslash => + { + switch(ch) + { + 'a' => + { + try tokenStr.append('\u{0007}'); + state.* = TokenizerState.Quote; + }, + 'b' => + { + try tokenStr.append('\u{0008}'); + state.* = TokenizerState.Quote; + }, + 't' => + { + try tokenStr.append('\t'); + state.* = TokenizerState.Quote; + }, + 'n' | '\n' => + { + try tokenStr.append('\n'); + state.* = TokenizerState.Quote; + }, + 'v' => + { + try tokenStr.append('\u{000b}'); + state.* = TokenizerState.Quote; + }, + 'f' => + { + try tokenStr.append('\u{000c}'); + state.* = TokenizerState.Quote; + }, + 'r' => + { + try tokenStr.append('\r'); + state.* = TokenizerState.Quote; + }, + '\\' => + { + try tokenStr.append('\\'); + state.* = TokenizerState.Quote; + }, + '"' => + { + try tokenStr.append('\"'); + state.* = TokenizerState.Quote; + }, + '\'' => + { + try tokenStr.append('\''); + state.* = TokenizerState.Quote; + }, + 'z' => + { + state.* = TokenizerState.QuoteBackslashZ; + }, + else => return error.UnknownEscapeSequence, + } + }, + TokenizerState.QuoteBackslashZ => + { + switch(ch) + { + '\\' => state.* = TokenizerState.QuoteBackslash, + '"' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String), + else => + { + if(!std.ascii.isWhitespace(ch)) + { + try tokenStr.append(ch); + state.* = TokenizerState.Quote; + } + else + { + // Noop, https://www.lua.org/manual/5.4/manual.html#3.1: + // "The escape sequence '\z' skips the following span of whitespace characters, including line breaks;" + } + } + } + }, + TokenizerState.SingleQuote => + { + switch(ch) + { + '\\' => state.* = TokenizerState.SingleQuoteBackslash, + '\'' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String), + else => try tokenStr.append(ch), + } + }, + TokenizerState.SingleQuoteBackslash => + { + switch(ch) + { + 'a' => + { + try tokenStr.append('\u{0007}'); + state.* = TokenizerState.SingleQuote; + }, + 'b' => + { + try tokenStr.append('\u{0008}'); + state.* = TokenizerState.SingleQuote; + }, + 't' => + { + try tokenStr.append('\t'); + state.* = TokenizerState.SingleQuote; + }, + 'n' | '\n' => + { + try tokenStr.append('\n'); + state.* = TokenizerState.SingleQuote; + }, + 'v' => + { + try tokenStr.append('\u{000b}'); + state.* = TokenizerState.SingleQuote; + }, + 'f' => + { + try tokenStr.append('\u{000c}'); + state.* = TokenizerState.SingleQuote; + }, + 'r' => + { + try tokenStr.append('\r'); + state.* = TokenizerState.SingleQuote; + }, + '\\' => + { + try tokenStr.append('\\'); + state.* = TokenizerState.SingleQuote; + }, + '"' => + { + try tokenStr.append('\"'); + state.* = TokenizerState.SingleQuote; + }, + '\'' => + { + try tokenStr.append('\''); + state.* = TokenizerState.SingleQuote; + }, + 'z' => + { + state.* = TokenizerState.SingleQuoteBackslashZ; + }, + else => return error.UnknownEscapeSequence, + } + }, + TokenizerState.SingleQuoteBackslashZ => + { + switch(ch) + { + '\\' => state.* = TokenizerState.SingleQuoteBackslash, + '\'' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String), + else => + { + if(!std.ascii.isWhitespace(ch)) + { + try tokenStr.append(ch); + state.* = TokenizerState.SingleQuote; + } + else + { + // Noop, https://www.lua.org/manual/5.4/manual.html#3.1: + // "The escape sequence '\z' skips the following span of whitespace characters, including line breaks;" + } + } + } + }, + TokenizerState.String => try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, TokenType.StringLiteral, allocator), + TokenizerState.Name => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + TokenizerState.Zero => + { + switch(ch) + { + 'x' => + { + try tokenStr.*.append(ch); + tokenType.* = null; + state.* = TokenizerState.HexNumberX; + }, + '.' => return error.NotImplemented, + else => + { + if(std.ascii.isDigit(ch)) + { + const digitValue = @as(i64, ch - '0'); + lastIndex.* = index.*; + if(tokenNumeral.* == null) + { + tokenNumeral.* = types.Numeral { .Integer = digitValue }; + tokenType.* = TokenType.Numeral; + } + else + { + tokenNumeral.*.?.Integer = tokenNumeral.*.?.Integer * 10 + digitValue; + } + } + else + { + try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator); + } + } + } + }, + TokenizerState.HexNumberX => + { + if(std.ascii.isHex(ch)) + { + lastIndex.* = index.*; + tokenType.* = TokenType.Numeral; + if(std.ascii.isDigit(ch)) + { + tokenNumeral.* = types.Numeral { .Integer = @as(i64, ch - '0') }; + } + else + { + tokenNumeral.* = types.Numeral { .Integer = @as(i64, std.ascii.toLower(ch) - 'a') }; + } + } + else + { + try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator); + } + }, + TokenizerState.HexNumber => + { + switch(ch) + { + 'p' => + { + try tokenStr.*.append(ch); + tokenType.* = null; + state.* = TokenizerState.HexExpNumber; + }, + else => + { + if(std.ascii.isHex(ch)) + { + lastIndex.* = index.*; + tokenType.* = TokenType.Numeral; + if(std.ascii.isDigit(ch)) + { + tokenNumeral.* = types.Numeral { .Integer = @as(i64, ch - '0') }; + } + else + { + tokenNumeral.* = types.Numeral { .Integer = @as(i64, std.ascii.toLower(ch) - 'a') }; + } + } + else + { + try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator); + } + } + } + }, + TokenizerState.Number => + { + switch(ch) + { + 'e' => + { + try tokenStr.*.append(ch); + tokenType.* = null; + state.* = TokenizerState.ExpNumber; + }, + '.' => return error.NotImplemented, + else => + { + if(std.ascii.isDigit(ch)) + { + const digitValue = @as(i64, ch - '0'); + lastIndex.* = index.*; + if(tokenNumeral.* == null) + { + tokenNumeral.* = types.Numeral { .Integer = digitValue }; + tokenType.* = TokenType.Numeral; + } + else + { + tokenNumeral.*.?.Integer = tokenNumeral.*.?.Integer * 10 + digitValue; + } + } + else + { + try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator); + } + + } + } + }, + TokenizerState.Comma, TokenizerState.RoundOpen, TokenizerState.RoundClosed, + TokenizerState.CurlyOpen, TokenizerState.CurlyClosed, TokenizerState.Plus, + TokenizerState.TildeEquals, TokenizerState.EqualsEquals, TokenizerState.Hash, + TokenizerState.GtEquals, TokenizerState.LtEquals, TokenizerState.SquareOpen, + TokenizerState.SquareClosed, TokenizerState.Pipe, TokenizerState.Ampersand, + TokenizerState.Percent, TokenizerState.Star, TokenizerState.Semicolon, + TokenizerState.Caret, TokenizerState.DotDotDot, TokenizerState.GtGt, + TokenizerState.LtLt, TokenizerState.SlashSlash => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator), + TokenizerState.Tilde => + { + switch(ch) + { + '=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.TildeEquals, TokenizerState.TildeEquals), + else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator), + } + }, + TokenizerState.Gt => + { + switch (ch) + { + '>' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.GtGt, TokenizerState.GtGt), + '=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.GtEquals, TokenizerState.GtEquals), + else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator), + } + }, + TokenizerState.Lt => + { + switch(ch) + { + '<' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.LtLt, TokenizerState.LtLt), + '=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.LtEquals, TokenizerState.LtEquals), + else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator), + } + }, + TokenizerState.Slash => + { + switch(ch) + { + '/' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SlashSlash, TokenizerState.SlashSlash), + else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator), + } + }, + TokenizerState.Dot => + { + switch(ch) + { + '.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.DotDot, TokenizerState.DotDot), + else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator), + } + }, + TokenizerState.DotDot => + { + switch(ch) + { + '.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.DotDotDot, TokenizerState.DotDotDot), + else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator), + } + }, + TokenizerState.Colon => + { + switch(ch) + { + ':' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.ColonColon, TokenizerState.ColonColon), + else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator), + } + }, + TokenizerState.Equals => + { + switch(ch) + { + '=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.EqualsEquals, TokenizerState.EqualsEquals), + else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator), + } + }, + TokenizerState.Minus => + { + switch(ch) + { + '-' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, null, TokenizerState.SmallCommentStart), + else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator), + } + }, + TokenizerState.SmallCommentStart => + { + switch(ch) + { + '[' => + { + tokenType.* = null; + state.* = TokenizerState.BigCommentLongBracketStart; + }, + '\n' => + { + state.* = TokenizerState.Start; + lastIndex.* = null; + }, + else => + { + state.* = TokenizerState.SmallComment; + }, + } + }, + TokenizerState.SmallComment => + { + switch(ch) + { + '\n' => + { + state.* = TokenizerState.Start; + lastIndex.* = null; + }, + else => { } + } + }, + TokenizerState.BigCommentLongBracketStart => + { + switch(ch) + { + '=' => + { + longBracketLevel.* += 1; + }, + '[' => + { + state.* = TokenizerState.BigComment; + }, + else => return error.LongBracketMalformedStartBigComment, + } + }, + TokenizerState.BigComment => + { + switch(ch) + { + ']' => + { + state.* = TokenizerState.BigCommentLongBracketEnd; + }, + else => { }, + } + }, + TokenizerState.BigCommentLongBracketEnd => + { + switch(ch) + { + '=' => + { + if(longBracketLevel.* == 0) + { + return error.LongBracketLevelTooBigEndBigComment; + } + longBracketLevel.* -= 1; + }, + ']' => + { + if(longBracketLevel.* != 0) + { + return error.LongBracketLevelTooSmallEndBigComment; + } + state.* = TokenizerState.Start; + }, + else => return error.LongBracketMalformedSmallEndBigComment, + } + }, + TokenizerState.A => + { + switch(ch) + { + 'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.An, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.An => + { + switch(ch) + { + 'd' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.And, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.And => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.And, allocator), + TokenizerState.W => + { + switch(ch) + { + 'h' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Wh, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Wh => + { + switch(ch) + { + 'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Whi, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Whi => + { + switch(ch) + { + 'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Whil, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Whil => + { + switch(ch) + { + 'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.While, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.While => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.While, allocator), + TokenizerState.B => + { + switch(ch) + { + 'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Br, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Br => + { + switch(ch) + { + 'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Bre, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Bre => + { + switch(ch) + { + 'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Brea, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Brea => + { + switch(ch) + { + 'k' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Break, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Break => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Break, allocator), + TokenizerState.G => + { + switch(ch) + { + 'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Go, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Go => + { + switch(ch) + { + 't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Got, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Got => + { + switch(ch) + { + 'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Goto, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Goto => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Goto, allocator), + TokenizerState.R => + { + switch(ch) + { + 'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Re, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Re => + { + switch(ch) + { + 't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Ret, tokenStr, ch), + 'p' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Rep, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Ret => + { + switch(ch) + { + 'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Retu, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Retu => + { + switch(ch) + { + 'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Retur, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Retur => + { + switch(ch) + { + 'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Return, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Return => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Return, allocator), + TokenizerState.Rep => + { + switch(ch) + { + 'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repe, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Repe => + { + switch(ch) + { + 'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repea, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Repea => + { + switch(ch) + { + 't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repeat, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Repeat => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Repeat, allocator), + TokenizerState.N => + { + switch(ch) + { + 'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Ni, tokenStr, ch), + 'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.No, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.No => + { + switch(ch) + { + 't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Not, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Not => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Not, allocator), + TokenizerState.Ni => + { + switch(ch) + { + 'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Nil, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Nil => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Nil, allocator), + TokenizerState.T => + { + switch(ch) + { + 'h' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Th, tokenStr, ch), + 'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Tr, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Th => + { + switch(ch) + { + 'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.The, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.The => + { + switch(ch) + { + 'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Then, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Then => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Then, allocator), + TokenizerState.Tr => + { + switch(ch) + { + 'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Tru, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Tru => + { + switch(ch) + { + 'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.True, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.True => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.True, allocator), + TokenizerState.E => + { + switch(ch) + { + 'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.El, tokenStr, ch), + 'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.En, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.En => + { + switch(ch) + { + 'd' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.End, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.End => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.End, allocator), + TokenizerState.El => + { + switch(ch) + { + 's' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Els, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Els => + { + switch(ch) + { + 'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Else, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Else => + { + switch(ch) + { + 'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Elsei, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Else, allocator), + } + }, + TokenizerState.Elsei => + { + switch(ch) + { + 'f' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Elseif, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Elseif => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Elseif, allocator), + TokenizerState.O => + { + switch(ch) + { + 'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Or, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Or => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Or, allocator), + TokenizerState.D => + { + switch(ch) + { + 'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Do, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Do => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Do, allocator), + TokenizerState.I => + { + switch(ch) + { + 'f' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.If, tokenStr, ch), + 'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.In, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.In => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.In, allocator), + TokenizerState.If => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.If, allocator), + TokenizerState.F => + { + switch(ch) + { + 'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fa, tokenStr, ch), + 'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fo, tokenStr, ch), + 'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fu, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Fu => + { + switch(ch) + { + 'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fun, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Fun => + { + switch(ch) + { + 'c' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Func, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Func => + { + switch(ch) + { + 't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Funct, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Funct => + { + switch(ch) + { + 'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Functi, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Functi => + { + switch(ch) + { + 'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Functio, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Functio => + { + switch(ch) + { + 'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Function, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Function => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Function, allocator), + TokenizerState.Fa => + { + switch(ch) + { + 'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fal, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Fal => + { + switch(ch) + { + 's' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fals, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Fals => + { + switch(ch) + { + 'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.False, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.False => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.False, allocator), + TokenizerState.Fo => + { + switch(ch) + { + 'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.For, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.For => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.For, allocator), + TokenizerState.L => + { + switch(ch) + { + 'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Lo, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Lo => + { + switch(ch) + { + 'c' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Loc, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Loc => + { + switch(ch) + { + 'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Loca, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Loca => + { + switch(ch) + { + 'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Local, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Local => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Local, allocator), + TokenizerState.U => + { + switch(ch) + { + 'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Un, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Un => + { + switch(ch) + { + 't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Unt, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Unt => + { + switch(ch) + { + 'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Unti, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Unti => + { + switch(ch) + { + 'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Until, tokenStr, ch), + else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator), + } + }, + TokenizerState.Until => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Until, allocator), + else => + { + std.debug.print("{}\n", . {state.*}); + return error.NotImplemented; + } + } +} + +pub fn tokenize(file_content: []u8, allocator: std.mem.Allocator) ![]Token +{ + var tokens = std.ArrayList(Token).init(allocator); + var state: TokenizerState = TokenizerState.Start; + var lastIndex: ?usize = null; + var index: usize = 0; + var tokenType: ?TokenType = null; + var tokenStr = std.ArrayList(u8).init(allocator); + defer tokenStr.deinit(); + var tokenNumeral: ?types.Numeral = null; + var longBracketLevel: u32 = 0; + + while(index < file_content.len) + { + const ch = file_content[index]; + try tokenizeChar(&state, ch, &lastIndex, &index, &tokenType, &tokenStr, &tokenNumeral, &tokens, &longBracketLevel, allocator); + index += 1; + } + return tokens.toOwnedSlice(); +} diff --git a/src/types.zig b/src/types.zig new file mode 100644 index 0000000..411c6ca --- /dev/null +++ b/src/types.zig @@ -0,0 +1,5 @@ +pub const Numeral = union(enum) +{ + Integer: i64, + Float: f64, +}; diff --git a/test/test.lua b/test/test.lua new file mode 100644 index 0000000..5ec0b63 --- /dev/null +++ b/test/test.lua @@ -0,0 +1 @@ +local t=(string.find(originalField.af,'m') and originalField.tableAction) or c.tableAction or originalField.tableAction or tableActionGeneric