Add tokenizer

This commit is contained in:
0x4261756D 2023-09-15 11:07:50 +02:00
commit 721383a043
6 changed files with 1328 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
zig-out/
zig-cache/

77
build.zig Normal file
View File

@ -0,0 +1,77 @@
const std = @import("std");
// Although this function looks imperative, note that its job is to
// declaratively construct a build graph that will be executed by an external
// runner.
pub fn build(b: *std.Build) void {
// Standard target options allows the person running `zig build` to choose
// what target to build for. Here we do not override the defaults, which
// means any target is allowed, and the default is native. Other options
// for restricting supported target set are available.
const target = b.standardTargetOptions(.{});
// Standard optimization options allow the person running `zig build` to select
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
// set a preferred release mode, allowing the user to decide how to optimize.
const optimize = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{
.name = "luaaaaah_zig",
// In this case the main source file is merely a path, however, in more
// complicated build scripts, this could be a generated file.
.root_source_file = .{ .path = "src/main.zig" },
.target = target,
.optimize = optimize,
});
exe.addModule("types", b.addModule("types", .{
.source_file = .{ .path = "src/types.zig" }
}));
exe.addModule("tokenizer", b.addModule("tokenizer", .{
.source_file = .{ .path = "src/tokenizer.zig" }
}));
// This declares intent for the executable to be installed into the
// standard location when the user invokes the "install" step (the default
// step when running `zig build`).
b.installArtifact(exe);
// This *creates* a Run step in the build graph, to be executed when another
// step is evaluated that depends on it. The next line below will establish
// such a dependency.
const run_cmd = b.addRunArtifact(exe);
// By making the run step depend on the install step, it will be run from the
// installation directory rather than directly from within the cache directory.
// This is not necessary, however, if the application depends on other installed
// files, this ensures they will be present and in the expected location.
run_cmd.step.dependOn(b.getInstallStep());
// This allows the user to pass arguments to the application in the build
// command itself, like this: `zig build run -- arg1 arg2 etc`
if (b.args) |args| {
run_cmd.addArgs(args);
}
// This creates a build step. It will be visible in the `zig build --help` menu,
// and can be selected like this: `zig build run`
// This will evaluate the `run` step rather than the default, which is "install".
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
// Creates a step for unit testing. This only builds the test executable
// but does not run it.
const unit_tests = b.addTest(.{
.root_source_file = .{ .path = "src/main.zig" },
.target = target,
.optimize = optimize,
});
const run_unit_tests = b.addRunArtifact(unit_tests);
// Similar to creating the run step earlier, this exposes a `test` step to
// the `zig build --help` menu, providing a way for the user to request
// running the unit tests.
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_unit_tests.step);
}

44
src/main.zig Normal file
View File

@ -0,0 +1,44 @@
const std = @import("std");
const tokenize = @import("tokenizer").tokenize;
pub fn main() !void
{
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const args = try std.process.argsAlloc(allocator);
defer std.process.argsFree(allocator, args);
const file = try std.fs.cwd().openFile(args[1], .{});
defer file.close();
const content = try file.readToEndAlloc(allocator, 13000);
defer allocator.free(content);
const tokens = try tokenize(content, allocator);
//std.debug.print("tokens: {any}", .{tokens});
for(tokens) |token|
{
switch(token.tokenData)
{
.string => |*data| std.debug.print("string: {s} {*}\n", .{data.*, data.ptr}),
.numeral => |*data| std.debug.print("numeral: {any} {*}\n", .{data.*, data}),
.none => |*data| std.debug.print("none {*}\n", .{data})
}
}
defer
{
var i: usize = 0;
while(i < tokens.len)
{
switch(tokens[i].tokenData)
{
.string => |*data|
{
allocator.free(data.*);
},
else => {}
}
i += 1;
}
allocator.free(tokens);
}
}

1199
src/tokenizer.zig Normal file
View File

@ -0,0 +1,1199 @@
const types = @import("types.zig");
const std = @import("std");
const TokenType = enum
{
Name,
And, Break, Do, Else, Elseif, End,
False, For, Function, Goto, If, In,
Local, Nil, Not, Or, Repeat, Return,
Then, True, Until, While,
Plus, Minus, Star, Slash, Percent, Caret, Hash,
Ampersand, Tilde, Pipe, LtLt, GtGt, SlashSlash,
EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals,
RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon,
Semicolon, Colon, Comma, Dot, DotDot, DotDotDot,
Numeral,
StringLiteral,
};
const TokenData = union(enum)
{
string: []u8,
numeral: types.Numeral,
none,
};
const Token = struct
{
tokenType: TokenType,
tokenData: TokenData,
};
const TokenizerState = enum
{
Start,
Quote, SingleQuote, Name, Number, Zero,
A, B, D, E, F, G, I, L, N, O, R, T, U, W,
Plus, Minus, Star, Slash, Percent, Caret, Hash,
Ampersand, Tilde, Pipe, Lt, Gt, Equals, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed,
Colon, Semicolon, Comma, Dot,
An, Br, Do, El, En, Fa, Fo, Fu, Go, If, In, Lo, Ni, No, Or, Re, Th, Tr, Un, Wh,
LtLt, GtGt, SlashSlash, EqualsEquals, TildeEquals, LtEquals, GtEquals, ColonColon, DotDot,
SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ,
BigCommentLongBracketStart, SmallComment,
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,
BigComment, BigCommentLongBracketEnd,
Break, Elsei, False, Funct, Local, Repea, Retur, Until, While,
Elseif, Functi, Repeat, Return,
Functio,
Function,
};
fn tokenizeUpdateIndexAndState(lastIndex: *?usize, index: ?usize, state: *TokenizerState, newState: TokenizerState) void
{
lastIndex.* = index;
state.* = newState;
}
fn tokenizeTerminalBase(lastIndex: *?usize, index: ?usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: ?TokenType, newState: TokenizerState) void
{
tokenizeUpdateIndexAndState(lastIndex, index, state, newState);
tokenType.* = newTokenType;
}
fn tokenizeTerminalStr(lastIndex: *?usize, index: usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: ?TokenType, newState: TokenizerState, tokenStr: *std.ArrayList(u8), ch: u8) !void
{
tokenizeTerminalBase(lastIndex, index, tokenType, state, newTokenType, newState);
try tokenStr.append(ch);
}
fn tokenizeTerminalIntNum(lastIndex: *?usize, index: usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: TokenType, newState: TokenizerState, tokenNumeral: *?types.Numeral, ch: u8) !void
{
tokenizeTerminalBase(lastIndex, index, tokenType, state, newTokenType, newState);
if(!std.ascii.isDigit(ch))
{
return error.NoDigit;
}
const digitValue = @as(i64, ch - '0');
if(tokenNumeral.* != null)
{
tokenNumeral.* = types.Numeral { .Integer = digitValue };
}
else
{
switch(tokenNumeral.*.?)
{
.Integer => |*n| n.* = n.* * 10 + digitValue,
.Float => return error.ExpectedIntGotFloat
}
}
}
fn tokenizeTerminalNoToken(lastIndex: *?usize, index: usize, state: *TokenizerState, newState: TokenizerState, tokenStr: *std.ArrayList(u8), ch: u8) !void
{
tokenizeUpdateIndexAndState(lastIndex, index, state, newState);
try tokenStr.*.append(ch);
}
fn tokenizeBacktrack(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, allocator: std.mem.Allocator) !void
{
try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, tokenType.*.?, allocator);
}
fn tokenizeBacktrackCustomToken(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, newTokenType: TokenType, allocator: std.mem.Allocator) !void
{
if(lastIndex.* == null or tokenType.* == null)
{
return error.LexError;
}
if(newTokenType == TokenType.StringLiteral or newTokenType == TokenType.Name)
{
const content = try allocator.alloc(u8, tokenStr.*.items.len);
@memcpy(content, tokenStr.*.items);
try tokens.append(Token { .tokenType = newTokenType, .tokenData = TokenData { .string = content } });
}
else
{
try tokens.append(Token { .tokenType = newTokenType, .tokenData = if(tokenType.*.? == TokenType.Numeral) TokenData { .numeral = tokenNumeral.*.? }
else TokenData.none
});
}
tokenNumeral.* = null;
index.* = lastIndex.*.?;
tokenStr.*.clearAndFree();
tokenizeTerminalBase(lastIndex, null, tokenType, state, null, TokenizerState.Start);
}
fn tokenizeAlphanumericNonstart(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, ch: u8, newTokenType: TokenType, allocator: std.mem.Allocator) !void
{
if(std.ascii.isAlphanumeric(ch) or ch == '_')
{
try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.Name, tokenStr, ch);
}
else
{
try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, newTokenType, allocator);
}
}
fn tokenizeChar(state: *TokenizerState, ch: u8, lastIndex: *?usize, index: *usize, tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, tokens: *std.ArrayList(Token), longBracketLevel: *u32, allocator: std.mem.Allocator) !void
{
switch(state.*)
{
TokenizerState.Start =>
{
switch(ch)
{
'-' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Minus, TokenizerState.Minus),
',' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Comma, TokenizerState.Comma),
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Equals, TokenizerState.Equals),
'(' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.RoundOpen, TokenizerState.RoundOpen),
')' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.RoundClosed, TokenizerState.RoundClosed),
'.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Dot, TokenizerState.Dot),
':' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Colon, TokenizerState.Colon),
'{' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.CurlyOpen, TokenizerState.CurlyOpen),
'}' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.CurlyClosed, TokenizerState.CurlyClosed),
'[' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SquareOpen, TokenizerState.SquareOpen),
']' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SquareClosed, TokenizerState.SquareClosed),
'+' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Plus, TokenizerState.Plus),
'~' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Tilde, TokenizerState.Tilde),
'>' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Gt, TokenizerState.Gt),
'<' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Lt, TokenizerState.Lt),
'#' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Hash, TokenizerState.Hash),
'|' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Pipe, TokenizerState.Pipe),
'&' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Ampersand, TokenizerState.Ampersand),
'%' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Percent, TokenizerState.Percent),
'*' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Star, TokenizerState.Star),
'/' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Slash, TokenizerState.Slash),
';' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Semicolon, TokenizerState.Semicolon),
'^' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Caret, TokenizerState.Caret),
'a' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.A, tokenStr, ch),
'b' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.B, tokenStr, ch),
'd' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.D, tokenStr, ch),
'e' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.E, tokenStr, ch),
'f' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.F, tokenStr, ch),
'i' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.I, tokenStr, ch),
'g' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.G, tokenStr, ch),
'l' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.L, tokenStr, ch),
'n' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.N, tokenStr, ch),
'o' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.O, tokenStr, ch),
'r' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.R, tokenStr, ch),
't' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.T, tokenStr, ch),
'u' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.U, tokenStr, ch),
'w' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.W, tokenStr, ch),
'0' => try tokenizeTerminalIntNum(lastIndex, index.*, tokenType, state, TokenType.Numeral, TokenizerState.Zero, tokenNumeral, ch),
'"' =>
{
tokenType.* = null;
state.* = TokenizerState.Quote;
},
'\'' =>
{
tokenType.* = null;
state.* = TokenizerState.SingleQuote;
},
else =>
{
if(std.ascii.isWhitespace(ch))
{
}
else if(std.ascii.isAlphabetic(ch) or ch == '_')
{
try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.Name, tokenStr, ch);
}
else if(std.ascii.isDigit(ch))
{
try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Numeral, TokenizerState.Name, tokenStr, ch);
}
else
{
std.debug.print("{}: {c}\n", .{state.*, ch});
return error.NotImplemented;
}
}
}
},
TokenizerState.Quote =>
{
switch(ch)
{
'\\' => state.* = TokenizerState.QuoteBackslash,
'"' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String),
else => try tokenStr.*.append(ch),
}
},
TokenizerState.QuoteBackslash =>
{
switch(ch)
{
'a' =>
{
try tokenStr.append('\u{0007}');
state.* = TokenizerState.Quote;
},
'b' =>
{
try tokenStr.append('\u{0008}');
state.* = TokenizerState.Quote;
},
't' =>
{
try tokenStr.append('\t');
state.* = TokenizerState.Quote;
},
'n' | '\n' =>
{
try tokenStr.append('\n');
state.* = TokenizerState.Quote;
},
'v' =>
{
try tokenStr.append('\u{000b}');
state.* = TokenizerState.Quote;
},
'f' =>
{
try tokenStr.append('\u{000c}');
state.* = TokenizerState.Quote;
},
'r' =>
{
try tokenStr.append('\r');
state.* = TokenizerState.Quote;
},
'\\' =>
{
try tokenStr.append('\\');
state.* = TokenizerState.Quote;
},
'"' =>
{
try tokenStr.append('\"');
state.* = TokenizerState.Quote;
},
'\'' =>
{
try tokenStr.append('\'');
state.* = TokenizerState.Quote;
},
'z' =>
{
state.* = TokenizerState.QuoteBackslashZ;
},
else => return error.UnknownEscapeSequence,
}
},
TokenizerState.QuoteBackslashZ =>
{
switch(ch)
{
'\\' => state.* = TokenizerState.QuoteBackslash,
'"' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String),
else =>
{
if(!std.ascii.isWhitespace(ch))
{
try tokenStr.append(ch);
state.* = TokenizerState.Quote;
}
else
{
// Noop, https://www.lua.org/manual/5.4/manual.html#3.1:
// "The escape sequence '\z' skips the following span of whitespace characters, including line breaks;"
}
}
}
},
TokenizerState.SingleQuote =>
{
switch(ch)
{
'\\' => state.* = TokenizerState.SingleQuoteBackslash,
'\'' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String),
else => try tokenStr.append(ch),
}
},
TokenizerState.SingleQuoteBackslash =>
{
switch(ch)
{
'a' =>
{
try tokenStr.append('\u{0007}');
state.* = TokenizerState.SingleQuote;
},
'b' =>
{
try tokenStr.append('\u{0008}');
state.* = TokenizerState.SingleQuote;
},
't' =>
{
try tokenStr.append('\t');
state.* = TokenizerState.SingleQuote;
},
'n' | '\n' =>
{
try tokenStr.append('\n');
state.* = TokenizerState.SingleQuote;
},
'v' =>
{
try tokenStr.append('\u{000b}');
state.* = TokenizerState.SingleQuote;
},
'f' =>
{
try tokenStr.append('\u{000c}');
state.* = TokenizerState.SingleQuote;
},
'r' =>
{
try tokenStr.append('\r');
state.* = TokenizerState.SingleQuote;
},
'\\' =>
{
try tokenStr.append('\\');
state.* = TokenizerState.SingleQuote;
},
'"' =>
{
try tokenStr.append('\"');
state.* = TokenizerState.SingleQuote;
},
'\'' =>
{
try tokenStr.append('\'');
state.* = TokenizerState.SingleQuote;
},
'z' =>
{
state.* = TokenizerState.SingleQuoteBackslashZ;
},
else => return error.UnknownEscapeSequence,
}
},
TokenizerState.SingleQuoteBackslashZ =>
{
switch(ch)
{
'\\' => state.* = TokenizerState.SingleQuoteBackslash,
'\'' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String),
else =>
{
if(!std.ascii.isWhitespace(ch))
{
try tokenStr.append(ch);
state.* = TokenizerState.SingleQuote;
}
else
{
// Noop, https://www.lua.org/manual/5.4/manual.html#3.1:
// "The escape sequence '\z' skips the following span of whitespace characters, including line breaks;"
}
}
}
},
TokenizerState.String => try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, TokenType.StringLiteral, allocator),
TokenizerState.Name => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
TokenizerState.Zero =>
{
switch(ch)
{
'x' =>
{
try tokenStr.*.append(ch);
tokenType.* = null;
state.* = TokenizerState.HexNumberX;
},
'.' => return error.NotImplemented,
else =>
{
if(std.ascii.isDigit(ch))
{
const digitValue = @as(i64, ch - '0');
lastIndex.* = index.*;
if(tokenNumeral.* == null)
{
tokenNumeral.* = types.Numeral { .Integer = digitValue };
tokenType.* = TokenType.Numeral;
}
else
{
tokenNumeral.*.?.Integer = tokenNumeral.*.?.Integer * 10 + digitValue;
}
}
else
{
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator);
}
}
}
},
TokenizerState.HexNumberX =>
{
if(std.ascii.isHex(ch))
{
lastIndex.* = index.*;
tokenType.* = TokenType.Numeral;
if(std.ascii.isDigit(ch))
{
tokenNumeral.* = types.Numeral { .Integer = @as(i64, ch - '0') };
}
else
{
tokenNumeral.* = types.Numeral { .Integer = @as(i64, std.ascii.toLower(ch) - 'a') };
}
}
else
{
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator);
}
},
TokenizerState.HexNumber =>
{
switch(ch)
{
'p' =>
{
try tokenStr.*.append(ch);
tokenType.* = null;
state.* = TokenizerState.HexExpNumber;
},
else =>
{
if(std.ascii.isHex(ch))
{
lastIndex.* = index.*;
tokenType.* = TokenType.Numeral;
if(std.ascii.isDigit(ch))
{
tokenNumeral.* = types.Numeral { .Integer = @as(i64, ch - '0') };
}
else
{
tokenNumeral.* = types.Numeral { .Integer = @as(i64, std.ascii.toLower(ch) - 'a') };
}
}
else
{
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator);
}
}
}
},
TokenizerState.Number =>
{
switch(ch)
{
'e' =>
{
try tokenStr.*.append(ch);
tokenType.* = null;
state.* = TokenizerState.ExpNumber;
},
'.' => return error.NotImplemented,
else =>
{
if(std.ascii.isDigit(ch))
{
const digitValue = @as(i64, ch - '0');
lastIndex.* = index.*;
if(tokenNumeral.* == null)
{
tokenNumeral.* = types.Numeral { .Integer = digitValue };
tokenType.* = TokenType.Numeral;
}
else
{
tokenNumeral.*.?.Integer = tokenNumeral.*.?.Integer * 10 + digitValue;
}
}
else
{
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator);
}
}
}
},
TokenizerState.Comma, TokenizerState.RoundOpen, TokenizerState.RoundClosed,
TokenizerState.CurlyOpen, TokenizerState.CurlyClosed, TokenizerState.Plus,
TokenizerState.TildeEquals, TokenizerState.EqualsEquals, TokenizerState.Hash,
TokenizerState.GtEquals, TokenizerState.LtEquals, TokenizerState.SquareOpen,
TokenizerState.SquareClosed, TokenizerState.Pipe, TokenizerState.Ampersand,
TokenizerState.Percent, TokenizerState.Star, TokenizerState.Semicolon,
TokenizerState.Caret, TokenizerState.DotDotDot, TokenizerState.GtGt,
TokenizerState.LtLt, TokenizerState.SlashSlash => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
TokenizerState.Tilde =>
{
switch(ch)
{
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.TildeEquals, TokenizerState.TildeEquals),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
}
},
TokenizerState.Gt =>
{
switch (ch)
{
'>' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.GtGt, TokenizerState.GtGt),
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.GtEquals, TokenizerState.GtEquals),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
}
},
TokenizerState.Lt =>
{
switch(ch)
{
'<' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.LtLt, TokenizerState.LtLt),
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.LtEquals, TokenizerState.LtEquals),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
}
},
TokenizerState.Slash =>
{
switch(ch)
{
'/' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SlashSlash, TokenizerState.SlashSlash),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
}
},
TokenizerState.Dot =>
{
switch(ch)
{
'.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.DotDot, TokenizerState.DotDot),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
}
},
TokenizerState.DotDot =>
{
switch(ch)
{
'.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.DotDotDot, TokenizerState.DotDotDot),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
}
},
TokenizerState.Colon =>
{
switch(ch)
{
':' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.ColonColon, TokenizerState.ColonColon),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
}
},
TokenizerState.Equals =>
{
switch(ch)
{
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.EqualsEquals, TokenizerState.EqualsEquals),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
}
},
TokenizerState.Minus =>
{
switch(ch)
{
'-' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, null, TokenizerState.SmallCommentStart),
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
}
},
TokenizerState.SmallCommentStart =>
{
switch(ch)
{
'[' =>
{
tokenType.* = null;
state.* = TokenizerState.BigCommentLongBracketStart;
},
'\n' =>
{
state.* = TokenizerState.Start;
lastIndex.* = null;
},
else =>
{
state.* = TokenizerState.SmallComment;
},
}
},
TokenizerState.SmallComment =>
{
switch(ch)
{
'\n' =>
{
state.* = TokenizerState.Start;
lastIndex.* = null;
},
else => { }
}
},
TokenizerState.BigCommentLongBracketStart =>
{
switch(ch)
{
'=' =>
{
longBracketLevel.* += 1;
},
'[' =>
{
state.* = TokenizerState.BigComment;
},
else => return error.LongBracketMalformedStartBigComment,
}
},
TokenizerState.BigComment =>
{
switch(ch)
{
']' =>
{
state.* = TokenizerState.BigCommentLongBracketEnd;
},
else => { },
}
},
TokenizerState.BigCommentLongBracketEnd =>
{
switch(ch)
{
'=' =>
{
if(longBracketLevel.* == 0)
{
return error.LongBracketLevelTooBigEndBigComment;
}
longBracketLevel.* -= 1;
},
']' =>
{
if(longBracketLevel.* != 0)
{
return error.LongBracketLevelTooSmallEndBigComment;
}
state.* = TokenizerState.Start;
},
else => return error.LongBracketMalformedSmallEndBigComment,
}
},
TokenizerState.A =>
{
switch(ch)
{
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.An, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.An =>
{
switch(ch)
{
'd' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.And, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.And => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.And, allocator),
TokenizerState.W =>
{
switch(ch)
{
'h' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Wh, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Wh =>
{
switch(ch)
{
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Whi, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Whi =>
{
switch(ch)
{
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Whil, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Whil =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.While, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.While => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.While, allocator),
TokenizerState.B =>
{
switch(ch)
{
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Br, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Br =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Bre, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Bre =>
{
switch(ch)
{
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Brea, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Brea =>
{
switch(ch)
{
'k' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Break, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Break => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Break, allocator),
TokenizerState.G =>
{
switch(ch)
{
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Go, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Go =>
{
switch(ch)
{
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Got, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Got =>
{
switch(ch)
{
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Goto, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Goto => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Goto, allocator),
TokenizerState.R =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Re, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Re =>
{
switch(ch)
{
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Ret, tokenStr, ch),
'p' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Rep, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Ret =>
{
switch(ch)
{
'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Retu, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Retu =>
{
switch(ch)
{
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Retur, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Retur =>
{
switch(ch)
{
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Return, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Return => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Return, allocator),
TokenizerState.Rep =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repe, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Repe =>
{
switch(ch)
{
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repea, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Repea =>
{
switch(ch)
{
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repeat, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Repeat => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Repeat, allocator),
TokenizerState.N =>
{
switch(ch)
{
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Ni, tokenStr, ch),
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.No, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.No =>
{
switch(ch)
{
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Not, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Not => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Not, allocator),
TokenizerState.Ni =>
{
switch(ch)
{
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Nil, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Nil => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Nil, allocator),
TokenizerState.T =>
{
switch(ch)
{
'h' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Th, tokenStr, ch),
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Tr, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Th =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.The, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.The =>
{
switch(ch)
{
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Then, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Then => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Then, allocator),
TokenizerState.Tr =>
{
switch(ch)
{
'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Tru, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Tru =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.True, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.True => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.True, allocator),
TokenizerState.E =>
{
switch(ch)
{
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.El, tokenStr, ch),
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.En, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.En =>
{
switch(ch)
{
'd' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.End, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.End => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.End, allocator),
TokenizerState.El =>
{
switch(ch)
{
's' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Els, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Els =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Else, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Else =>
{
switch(ch)
{
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Elsei, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Else, allocator),
}
},
TokenizerState.Elsei =>
{
switch(ch)
{
'f' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Elseif, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Elseif => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Elseif, allocator),
TokenizerState.O =>
{
switch(ch)
{
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Or, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Or => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Or, allocator),
TokenizerState.D =>
{
switch(ch)
{
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Do, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Do => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Do, allocator),
TokenizerState.I =>
{
switch(ch)
{
'f' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.If, tokenStr, ch),
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.In, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.In => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.In, allocator),
TokenizerState.If => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.If, allocator),
TokenizerState.F =>
{
switch(ch)
{
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fa, tokenStr, ch),
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fo, tokenStr, ch),
'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fu, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Fu =>
{
switch(ch)
{
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fun, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Fun =>
{
switch(ch)
{
'c' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Func, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Func =>
{
switch(ch)
{
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Funct, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Funct =>
{
switch(ch)
{
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Functi, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Functi =>
{
switch(ch)
{
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Functio, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Functio =>
{
switch(ch)
{
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Function, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Function => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Function, allocator),
TokenizerState.Fa =>
{
switch(ch)
{
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fal, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Fal =>
{
switch(ch)
{
's' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fals, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Fals =>
{
switch(ch)
{
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.False, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.False => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.False, allocator),
TokenizerState.Fo =>
{
switch(ch)
{
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.For, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.For => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.For, allocator),
TokenizerState.L =>
{
switch(ch)
{
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Lo, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Lo =>
{
switch(ch)
{
'c' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Loc, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Loc =>
{
switch(ch)
{
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Loca, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Loca =>
{
switch(ch)
{
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Local, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Local => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Local, allocator),
TokenizerState.U =>
{
switch(ch)
{
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Un, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Un =>
{
switch(ch)
{
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Unt, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Unt =>
{
switch(ch)
{
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Unti, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Unti =>
{
switch(ch)
{
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Until, tokenStr, ch),
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
}
},
TokenizerState.Until => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Until, allocator),
else =>
{
std.debug.print("{}\n", . {state.*});
return error.NotImplemented;
}
}
}
pub fn tokenize(file_content: []u8, allocator: std.mem.Allocator) ![]Token
{
var tokens = std.ArrayList(Token).init(allocator);
var state: TokenizerState = TokenizerState.Start;
var lastIndex: ?usize = null;
var index: usize = 0;
var tokenType: ?TokenType = null;
var tokenStr = std.ArrayList(u8).init(allocator);
defer tokenStr.deinit();
var tokenNumeral: ?types.Numeral = null;
var longBracketLevel: u32 = 0;
while(index < file_content.len)
{
const ch = file_content[index];
try tokenizeChar(&state, ch, &lastIndex, &index, &tokenType, &tokenStr, &tokenNumeral, &tokens, &longBracketLevel, allocator);
index += 1;
}
return tokens.toOwnedSlice();
}

5
src/types.zig Normal file
View File

@ -0,0 +1,5 @@
pub const Numeral = union(enum)
{
Integer: i64,
Float: f64,
};

1
test/test.lua Normal file
View File

@ -0,0 +1 @@
local t=(string.find(originalField.af,'m') and originalField.tableAction) or c.tableAction or originalField.tableAction or tableActionGeneric