Add tokenizer
This commit is contained in:
commit
721383a043
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
zig-out/
|
||||
zig-cache/
|
77
build.zig
Normal file
77
build.zig
Normal file
@ -0,0 +1,77 @@
|
||||
const std = @import("std");
|
||||
|
||||
// Although this function looks imperative, note that its job is to
|
||||
// declaratively construct a build graph that will be executed by an external
|
||||
// runner.
|
||||
pub fn build(b: *std.Build) void {
|
||||
// Standard target options allows the person running `zig build` to choose
|
||||
// what target to build for. Here we do not override the defaults, which
|
||||
// means any target is allowed, and the default is native. Other options
|
||||
// for restricting supported target set are available.
|
||||
const target = b.standardTargetOptions(.{});
|
||||
|
||||
// Standard optimization options allow the person running `zig build` to select
|
||||
// between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
|
||||
// set a preferred release mode, allowing the user to decide how to optimize.
|
||||
const optimize = b.standardOptimizeOption(.{});
|
||||
|
||||
const exe = b.addExecutable(.{
|
||||
.name = "luaaaaah_zig",
|
||||
// In this case the main source file is merely a path, however, in more
|
||||
// complicated build scripts, this could be a generated file.
|
||||
.root_source_file = .{ .path = "src/main.zig" },
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
|
||||
exe.addModule("types", b.addModule("types", .{
|
||||
.source_file = .{ .path = "src/types.zig" }
|
||||
}));
|
||||
exe.addModule("tokenizer", b.addModule("tokenizer", .{
|
||||
.source_file = .{ .path = "src/tokenizer.zig" }
|
||||
}));
|
||||
|
||||
// This declares intent for the executable to be installed into the
|
||||
// standard location when the user invokes the "install" step (the default
|
||||
// step when running `zig build`).
|
||||
b.installArtifact(exe);
|
||||
|
||||
// This *creates* a Run step in the build graph, to be executed when another
|
||||
// step is evaluated that depends on it. The next line below will establish
|
||||
// such a dependency.
|
||||
const run_cmd = b.addRunArtifact(exe);
|
||||
|
||||
// By making the run step depend on the install step, it will be run from the
|
||||
// installation directory rather than directly from within the cache directory.
|
||||
// This is not necessary, however, if the application depends on other installed
|
||||
// files, this ensures they will be present and in the expected location.
|
||||
run_cmd.step.dependOn(b.getInstallStep());
|
||||
|
||||
// This allows the user to pass arguments to the application in the build
|
||||
// command itself, like this: `zig build run -- arg1 arg2 etc`
|
||||
if (b.args) |args| {
|
||||
run_cmd.addArgs(args);
|
||||
}
|
||||
|
||||
// This creates a build step. It will be visible in the `zig build --help` menu,
|
||||
// and can be selected like this: `zig build run`
|
||||
// This will evaluate the `run` step rather than the default, which is "install".
|
||||
const run_step = b.step("run", "Run the app");
|
||||
run_step.dependOn(&run_cmd.step);
|
||||
|
||||
// Creates a step for unit testing. This only builds the test executable
|
||||
// but does not run it.
|
||||
const unit_tests = b.addTest(.{
|
||||
.root_source_file = .{ .path = "src/main.zig" },
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
|
||||
const run_unit_tests = b.addRunArtifact(unit_tests);
|
||||
|
||||
// Similar to creating the run step earlier, this exposes a `test` step to
|
||||
// the `zig build --help` menu, providing a way for the user to request
|
||||
// running the unit tests.
|
||||
const test_step = b.step("test", "Run unit tests");
|
||||
test_step.dependOn(&run_unit_tests.step);
|
||||
}
|
44
src/main.zig
Normal file
44
src/main.zig
Normal file
@ -0,0 +1,44 @@
|
||||
const std = @import("std");
|
||||
const tokenize = @import("tokenizer").tokenize;
|
||||
|
||||
pub fn main() !void
|
||||
{
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
const args = try std.process.argsAlloc(allocator);
|
||||
defer std.process.argsFree(allocator, args);
|
||||
const file = try std.fs.cwd().openFile(args[1], .{});
|
||||
defer file.close();
|
||||
const content = try file.readToEndAlloc(allocator, 13000);
|
||||
defer allocator.free(content);
|
||||
const tokens = try tokenize(content, allocator);
|
||||
//std.debug.print("tokens: {any}", .{tokens});
|
||||
for(tokens) |token|
|
||||
{
|
||||
switch(token.tokenData)
|
||||
{
|
||||
.string => |*data| std.debug.print("string: {s} {*}\n", .{data.*, data.ptr}),
|
||||
.numeral => |*data| std.debug.print("numeral: {any} {*}\n", .{data.*, data}),
|
||||
.none => |*data| std.debug.print("none {*}\n", .{data})
|
||||
}
|
||||
}
|
||||
defer
|
||||
{
|
||||
var i: usize = 0;
|
||||
while(i < tokens.len)
|
||||
{
|
||||
switch(tokens[i].tokenData)
|
||||
{
|
||||
.string => |*data|
|
||||
{
|
||||
allocator.free(data.*);
|
||||
},
|
||||
else => {}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
allocator.free(tokens);
|
||||
}
|
||||
}
|
1199
src/tokenizer.zig
Normal file
1199
src/tokenizer.zig
Normal file
@ -0,0 +1,1199 @@
|
||||
const types = @import("types.zig");
|
||||
const std = @import("std");
|
||||
|
||||
const TokenType = enum
|
||||
{
|
||||
Name,
|
||||
And, Break, Do, Else, Elseif, End,
|
||||
False, For, Function, Goto, If, In,
|
||||
Local, Nil, Not, Or, Repeat, Return,
|
||||
Then, True, Until, While,
|
||||
Plus, Minus, Star, Slash, Percent, Caret, Hash,
|
||||
Ampersand, Tilde, Pipe, LtLt, GtGt, SlashSlash,
|
||||
EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals,
|
||||
RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon,
|
||||
Semicolon, Colon, Comma, Dot, DotDot, DotDotDot,
|
||||
Numeral,
|
||||
StringLiteral,
|
||||
|
||||
};
|
||||
|
||||
const TokenData = union(enum)
|
||||
{
|
||||
string: []u8,
|
||||
numeral: types.Numeral,
|
||||
none,
|
||||
};
|
||||
|
||||
const Token = struct
|
||||
{
|
||||
tokenType: TokenType,
|
||||
tokenData: TokenData,
|
||||
};
|
||||
|
||||
const TokenizerState = enum
|
||||
{
|
||||
Start,
|
||||
Quote, SingleQuote, Name, Number, Zero,
|
||||
A, B, D, E, F, G, I, L, N, O, R, T, U, W,
|
||||
Plus, Minus, Star, Slash, Percent, Caret, Hash,
|
||||
Ampersand, Tilde, Pipe, Lt, Gt, Equals, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed,
|
||||
Colon, Semicolon, Comma, Dot,
|
||||
|
||||
An, Br, Do, El, En, Fa, Fo, Fu, Go, If, In, Lo, Ni, No, Or, Re, Th, Tr, Un, Wh,
|
||||
LtLt, GtGt, SlashSlash, EqualsEquals, TildeEquals, LtEquals, GtEquals, ColonColon, DotDot,
|
||||
SmallCommentStart, QuoteBackslash, SingleQuoteBackslash, String, HexNumberX, ExpNumber,
|
||||
|
||||
And, Bre, Els, End, Fal, For, Fun, Got, Loc, Nil, Not, Rep, Ret, The, Tru, Unt, Whi,
|
||||
DotDotDot, HexNumber, QuoteBackslashZ, SingleQuoteBackslashZ,
|
||||
BigCommentLongBracketStart, SmallComment,
|
||||
|
||||
Brea, Else, Fals, Func, Goto, Loca, Repe, Retu, Then, True, Unti, Whil, HexExpNumber,
|
||||
BigComment, BigCommentLongBracketEnd,
|
||||
|
||||
Break, Elsei, False, Funct, Local, Repea, Retur, Until, While,
|
||||
|
||||
Elseif, Functi, Repeat, Return,
|
||||
|
||||
Functio,
|
||||
|
||||
Function,
|
||||
};
|
||||
|
||||
fn tokenizeUpdateIndexAndState(lastIndex: *?usize, index: ?usize, state: *TokenizerState, newState: TokenizerState) void
|
||||
{
|
||||
lastIndex.* = index;
|
||||
state.* = newState;
|
||||
}
|
||||
fn tokenizeTerminalBase(lastIndex: *?usize, index: ?usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: ?TokenType, newState: TokenizerState) void
|
||||
{
|
||||
tokenizeUpdateIndexAndState(lastIndex, index, state, newState);
|
||||
tokenType.* = newTokenType;
|
||||
}
|
||||
fn tokenizeTerminalStr(lastIndex: *?usize, index: usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: ?TokenType, newState: TokenizerState, tokenStr: *std.ArrayList(u8), ch: u8) !void
|
||||
{
|
||||
tokenizeTerminalBase(lastIndex, index, tokenType, state, newTokenType, newState);
|
||||
try tokenStr.append(ch);
|
||||
}
|
||||
fn tokenizeTerminalIntNum(lastIndex: *?usize, index: usize, tokenType: *?TokenType, state: *TokenizerState, newTokenType: TokenType, newState: TokenizerState, tokenNumeral: *?types.Numeral, ch: u8) !void
|
||||
{
|
||||
tokenizeTerminalBase(lastIndex, index, tokenType, state, newTokenType, newState);
|
||||
if(!std.ascii.isDigit(ch))
|
||||
{
|
||||
return error.NoDigit;
|
||||
}
|
||||
const digitValue = @as(i64, ch - '0');
|
||||
if(tokenNumeral.* != null)
|
||||
{
|
||||
tokenNumeral.* = types.Numeral { .Integer = digitValue };
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(tokenNumeral.*.?)
|
||||
{
|
||||
.Integer => |*n| n.* = n.* * 10 + digitValue,
|
||||
.Float => return error.ExpectedIntGotFloat
|
||||
}
|
||||
}
|
||||
}
|
||||
fn tokenizeTerminalNoToken(lastIndex: *?usize, index: usize, state: *TokenizerState, newState: TokenizerState, tokenStr: *std.ArrayList(u8), ch: u8) !void
|
||||
{
|
||||
tokenizeUpdateIndexAndState(lastIndex, index, state, newState);
|
||||
try tokenStr.*.append(ch);
|
||||
}
|
||||
fn tokenizeBacktrack(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, allocator: std.mem.Allocator) !void
|
||||
{
|
||||
try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, tokenType.*.?, allocator);
|
||||
}
|
||||
fn tokenizeBacktrackCustomToken(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, newTokenType: TokenType, allocator: std.mem.Allocator) !void
|
||||
{
|
||||
if(lastIndex.* == null or tokenType.* == null)
|
||||
{
|
||||
return error.LexError;
|
||||
}
|
||||
if(newTokenType == TokenType.StringLiteral or newTokenType == TokenType.Name)
|
||||
{
|
||||
const content = try allocator.alloc(u8, tokenStr.*.items.len);
|
||||
@memcpy(content, tokenStr.*.items);
|
||||
try tokens.append(Token { .tokenType = newTokenType, .tokenData = TokenData { .string = content } });
|
||||
}
|
||||
else
|
||||
{
|
||||
try tokens.append(Token { .tokenType = newTokenType, .tokenData = if(tokenType.*.? == TokenType.Numeral) TokenData { .numeral = tokenNumeral.*.? }
|
||||
else TokenData.none
|
||||
});
|
||||
}
|
||||
tokenNumeral.* = null;
|
||||
index.* = lastIndex.*.?;
|
||||
tokenStr.*.clearAndFree();
|
||||
tokenizeTerminalBase(lastIndex, null, tokenType, state, null, TokenizerState.Start);
|
||||
}
|
||||
fn tokenizeAlphanumericNonstart(lastIndex: *?usize, index: *usize, tokens: *std.ArrayList(Token), tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, state: *TokenizerState, ch: u8, newTokenType: TokenType, allocator: std.mem.Allocator) !void
|
||||
{
|
||||
if(std.ascii.isAlphanumeric(ch) or ch == '_')
|
||||
{
|
||||
try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.Name, tokenStr, ch);
|
||||
}
|
||||
else
|
||||
{
|
||||
try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, newTokenType, allocator);
|
||||
}
|
||||
}
|
||||
fn tokenizeChar(state: *TokenizerState, ch: u8, lastIndex: *?usize, index: *usize, tokenType: *?TokenType, tokenStr: *std.ArrayList(u8), tokenNumeral: *?types.Numeral, tokens: *std.ArrayList(Token), longBracketLevel: *u32, allocator: std.mem.Allocator) !void
|
||||
{
|
||||
switch(state.*)
|
||||
{
|
||||
TokenizerState.Start =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'-' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Minus, TokenizerState.Minus),
|
||||
',' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Comma, TokenizerState.Comma),
|
||||
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Equals, TokenizerState.Equals),
|
||||
'(' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.RoundOpen, TokenizerState.RoundOpen),
|
||||
')' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.RoundClosed, TokenizerState.RoundClosed),
|
||||
'.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Dot, TokenizerState.Dot),
|
||||
':' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Colon, TokenizerState.Colon),
|
||||
'{' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.CurlyOpen, TokenizerState.CurlyOpen),
|
||||
'}' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.CurlyClosed, TokenizerState.CurlyClosed),
|
||||
'[' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SquareOpen, TokenizerState.SquareOpen),
|
||||
']' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SquareClosed, TokenizerState.SquareClosed),
|
||||
'+' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Plus, TokenizerState.Plus),
|
||||
'~' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Tilde, TokenizerState.Tilde),
|
||||
'>' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Gt, TokenizerState.Gt),
|
||||
'<' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Lt, TokenizerState.Lt),
|
||||
'#' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Hash, TokenizerState.Hash),
|
||||
'|' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Pipe, TokenizerState.Pipe),
|
||||
'&' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Ampersand, TokenizerState.Ampersand),
|
||||
'%' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Percent, TokenizerState.Percent),
|
||||
'*' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Star, TokenizerState.Star),
|
||||
'/' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Slash, TokenizerState.Slash),
|
||||
';' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Semicolon, TokenizerState.Semicolon),
|
||||
'^' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.Caret, TokenizerState.Caret),
|
||||
'a' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.A, tokenStr, ch),
|
||||
'b' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.B, tokenStr, ch),
|
||||
'd' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.D, tokenStr, ch),
|
||||
'e' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.E, tokenStr, ch),
|
||||
'f' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.F, tokenStr, ch),
|
||||
'i' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.I, tokenStr, ch),
|
||||
'g' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.G, tokenStr, ch),
|
||||
'l' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.L, tokenStr, ch),
|
||||
'n' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.N, tokenStr, ch),
|
||||
'o' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.O, tokenStr, ch),
|
||||
'r' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.R, tokenStr, ch),
|
||||
't' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.T, tokenStr, ch),
|
||||
'u' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.U, tokenStr, ch),
|
||||
'w' => try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.W, tokenStr, ch),
|
||||
'0' => try tokenizeTerminalIntNum(lastIndex, index.*, tokenType, state, TokenType.Numeral, TokenizerState.Zero, tokenNumeral, ch),
|
||||
'"' =>
|
||||
{
|
||||
tokenType.* = null;
|
||||
state.* = TokenizerState.Quote;
|
||||
},
|
||||
'\'' =>
|
||||
{
|
||||
tokenType.* = null;
|
||||
state.* = TokenizerState.SingleQuote;
|
||||
},
|
||||
else =>
|
||||
{
|
||||
if(std.ascii.isWhitespace(ch))
|
||||
{
|
||||
|
||||
}
|
||||
else if(std.ascii.isAlphabetic(ch) or ch == '_')
|
||||
{
|
||||
try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Name, TokenizerState.Name, tokenStr, ch);
|
||||
}
|
||||
else if(std.ascii.isDigit(ch))
|
||||
{
|
||||
try tokenizeTerminalStr(lastIndex, index.*, tokenType, state, TokenType.Numeral, TokenizerState.Name, tokenStr, ch);
|
||||
}
|
||||
else
|
||||
{
|
||||
std.debug.print("{}: {c}\n", .{state.*, ch});
|
||||
return error.NotImplemented;
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
TokenizerState.Quote =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'\\' => state.* = TokenizerState.QuoteBackslash,
|
||||
'"' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String),
|
||||
else => try tokenStr.*.append(ch),
|
||||
}
|
||||
},
|
||||
TokenizerState.QuoteBackslash =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'a' =>
|
||||
{
|
||||
try tokenStr.append('\u{0007}');
|
||||
state.* = TokenizerState.Quote;
|
||||
},
|
||||
'b' =>
|
||||
{
|
||||
try tokenStr.append('\u{0008}');
|
||||
state.* = TokenizerState.Quote;
|
||||
},
|
||||
't' =>
|
||||
{
|
||||
try tokenStr.append('\t');
|
||||
state.* = TokenizerState.Quote;
|
||||
},
|
||||
'n' | '\n' =>
|
||||
{
|
||||
try tokenStr.append('\n');
|
||||
state.* = TokenizerState.Quote;
|
||||
},
|
||||
'v' =>
|
||||
{
|
||||
try tokenStr.append('\u{000b}');
|
||||
state.* = TokenizerState.Quote;
|
||||
},
|
||||
'f' =>
|
||||
{
|
||||
try tokenStr.append('\u{000c}');
|
||||
state.* = TokenizerState.Quote;
|
||||
},
|
||||
'r' =>
|
||||
{
|
||||
try tokenStr.append('\r');
|
||||
state.* = TokenizerState.Quote;
|
||||
},
|
||||
'\\' =>
|
||||
{
|
||||
try tokenStr.append('\\');
|
||||
state.* = TokenizerState.Quote;
|
||||
},
|
||||
'"' =>
|
||||
{
|
||||
try tokenStr.append('\"');
|
||||
state.* = TokenizerState.Quote;
|
||||
},
|
||||
'\'' =>
|
||||
{
|
||||
try tokenStr.append('\'');
|
||||
state.* = TokenizerState.Quote;
|
||||
},
|
||||
'z' =>
|
||||
{
|
||||
state.* = TokenizerState.QuoteBackslashZ;
|
||||
},
|
||||
else => return error.UnknownEscapeSequence,
|
||||
}
|
||||
},
|
||||
TokenizerState.QuoteBackslashZ =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'\\' => state.* = TokenizerState.QuoteBackslash,
|
||||
'"' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String),
|
||||
else =>
|
||||
{
|
||||
if(!std.ascii.isWhitespace(ch))
|
||||
{
|
||||
try tokenStr.append(ch);
|
||||
state.* = TokenizerState.Quote;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Noop, https://www.lua.org/manual/5.4/manual.html#3.1:
|
||||
// "The escape sequence '\z' skips the following span of whitespace characters, including line breaks;"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
TokenizerState.SingleQuote =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'\\' => state.* = TokenizerState.SingleQuoteBackslash,
|
||||
'\'' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String),
|
||||
else => try tokenStr.append(ch),
|
||||
}
|
||||
},
|
||||
TokenizerState.SingleQuoteBackslash =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'a' =>
|
||||
{
|
||||
try tokenStr.append('\u{0007}');
|
||||
state.* = TokenizerState.SingleQuote;
|
||||
},
|
||||
'b' =>
|
||||
{
|
||||
try tokenStr.append('\u{0008}');
|
||||
state.* = TokenizerState.SingleQuote;
|
||||
},
|
||||
't' =>
|
||||
{
|
||||
try tokenStr.append('\t');
|
||||
state.* = TokenizerState.SingleQuote;
|
||||
},
|
||||
'n' | '\n' =>
|
||||
{
|
||||
try tokenStr.append('\n');
|
||||
state.* = TokenizerState.SingleQuote;
|
||||
},
|
||||
'v' =>
|
||||
{
|
||||
try tokenStr.append('\u{000b}');
|
||||
state.* = TokenizerState.SingleQuote;
|
||||
},
|
||||
'f' =>
|
||||
{
|
||||
try tokenStr.append('\u{000c}');
|
||||
state.* = TokenizerState.SingleQuote;
|
||||
},
|
||||
'r' =>
|
||||
{
|
||||
try tokenStr.append('\r');
|
||||
state.* = TokenizerState.SingleQuote;
|
||||
},
|
||||
'\\' =>
|
||||
{
|
||||
try tokenStr.append('\\');
|
||||
state.* = TokenizerState.SingleQuote;
|
||||
},
|
||||
'"' =>
|
||||
{
|
||||
try tokenStr.append('\"');
|
||||
state.* = TokenizerState.SingleQuote;
|
||||
},
|
||||
'\'' =>
|
||||
{
|
||||
try tokenStr.append('\'');
|
||||
state.* = TokenizerState.SingleQuote;
|
||||
},
|
||||
'z' =>
|
||||
{
|
||||
state.* = TokenizerState.SingleQuoteBackslashZ;
|
||||
},
|
||||
else => return error.UnknownEscapeSequence,
|
||||
}
|
||||
},
|
||||
TokenizerState.SingleQuoteBackslashZ =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'\\' => state.* = TokenizerState.SingleQuoteBackslash,
|
||||
'\'' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.StringLiteral, TokenizerState.String),
|
||||
else =>
|
||||
{
|
||||
if(!std.ascii.isWhitespace(ch))
|
||||
{
|
||||
try tokenStr.append(ch);
|
||||
state.* = TokenizerState.SingleQuote;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Noop, https://www.lua.org/manual/5.4/manual.html#3.1:
|
||||
// "The escape sequence '\z' skips the following span of whitespace characters, including line breaks;"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
TokenizerState.String => try tokenizeBacktrackCustomToken(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, TokenType.StringLiteral, allocator),
|
||||
TokenizerState.Name => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
TokenizerState.Zero =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'x' =>
|
||||
{
|
||||
try tokenStr.*.append(ch);
|
||||
tokenType.* = null;
|
||||
state.* = TokenizerState.HexNumberX;
|
||||
},
|
||||
'.' => return error.NotImplemented,
|
||||
else =>
|
||||
{
|
||||
if(std.ascii.isDigit(ch))
|
||||
{
|
||||
const digitValue = @as(i64, ch - '0');
|
||||
lastIndex.* = index.*;
|
||||
if(tokenNumeral.* == null)
|
||||
{
|
||||
tokenNumeral.* = types.Numeral { .Integer = digitValue };
|
||||
tokenType.* = TokenType.Numeral;
|
||||
}
|
||||
else
|
||||
{
|
||||
tokenNumeral.*.?.Integer = tokenNumeral.*.?.Integer * 10 + digitValue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
TokenizerState.HexNumberX =>
|
||||
{
|
||||
if(std.ascii.isHex(ch))
|
||||
{
|
||||
lastIndex.* = index.*;
|
||||
tokenType.* = TokenType.Numeral;
|
||||
if(std.ascii.isDigit(ch))
|
||||
{
|
||||
tokenNumeral.* = types.Numeral { .Integer = @as(i64, ch - '0') };
|
||||
}
|
||||
else
|
||||
{
|
||||
tokenNumeral.* = types.Numeral { .Integer = @as(i64, std.ascii.toLower(ch) - 'a') };
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator);
|
||||
}
|
||||
},
|
||||
TokenizerState.HexNumber =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'p' =>
|
||||
{
|
||||
try tokenStr.*.append(ch);
|
||||
tokenType.* = null;
|
||||
state.* = TokenizerState.HexExpNumber;
|
||||
},
|
||||
else =>
|
||||
{
|
||||
if(std.ascii.isHex(ch))
|
||||
{
|
||||
lastIndex.* = index.*;
|
||||
tokenType.* = TokenType.Numeral;
|
||||
if(std.ascii.isDigit(ch))
|
||||
{
|
||||
tokenNumeral.* = types.Numeral { .Integer = @as(i64, ch - '0') };
|
||||
}
|
||||
else
|
||||
{
|
||||
tokenNumeral.* = types.Numeral { .Integer = @as(i64, std.ascii.toLower(ch) - 'a') };
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
TokenizerState.Number =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'e' =>
|
||||
{
|
||||
try tokenStr.*.append(ch);
|
||||
tokenType.* = null;
|
||||
state.* = TokenizerState.ExpNumber;
|
||||
},
|
||||
'.' => return error.NotImplemented,
|
||||
else =>
|
||||
{
|
||||
if(std.ascii.isDigit(ch))
|
||||
{
|
||||
const digitValue = @as(i64, ch - '0');
|
||||
lastIndex.* = index.*;
|
||||
if(tokenNumeral.* == null)
|
||||
{
|
||||
tokenNumeral.* = types.Numeral { .Integer = digitValue };
|
||||
tokenType.* = TokenType.Numeral;
|
||||
}
|
||||
else
|
||||
{
|
||||
tokenNumeral.*.?.Integer = tokenNumeral.*.?.Integer * 10 + digitValue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
},
|
||||
TokenizerState.Comma, TokenizerState.RoundOpen, TokenizerState.RoundClosed,
|
||||
TokenizerState.CurlyOpen, TokenizerState.CurlyClosed, TokenizerState.Plus,
|
||||
TokenizerState.TildeEquals, TokenizerState.EqualsEquals, TokenizerState.Hash,
|
||||
TokenizerState.GtEquals, TokenizerState.LtEquals, TokenizerState.SquareOpen,
|
||||
TokenizerState.SquareClosed, TokenizerState.Pipe, TokenizerState.Ampersand,
|
||||
TokenizerState.Percent, TokenizerState.Star, TokenizerState.Semicolon,
|
||||
TokenizerState.Caret, TokenizerState.DotDotDot, TokenizerState.GtGt,
|
||||
TokenizerState.LtLt, TokenizerState.SlashSlash => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
|
||||
TokenizerState.Tilde =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.TildeEquals, TokenizerState.TildeEquals),
|
||||
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Gt =>
|
||||
{
|
||||
switch (ch)
|
||||
{
|
||||
'>' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.GtGt, TokenizerState.GtGt),
|
||||
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.GtEquals, TokenizerState.GtEquals),
|
||||
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Lt =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'<' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.LtLt, TokenizerState.LtLt),
|
||||
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.LtEquals, TokenizerState.LtEquals),
|
||||
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Slash =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'/' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.SlashSlash, TokenizerState.SlashSlash),
|
||||
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Dot =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.DotDot, TokenizerState.DotDot),
|
||||
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.DotDot =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'.' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.DotDotDot, TokenizerState.DotDotDot),
|
||||
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Colon =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
':' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.ColonColon, TokenizerState.ColonColon),
|
||||
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Equals =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'=' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, TokenType.EqualsEquals, TokenizerState.EqualsEquals),
|
||||
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Minus =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'-' => tokenizeTerminalBase(lastIndex, index.*, tokenType, state, null, TokenizerState.SmallCommentStart),
|
||||
else => try tokenizeBacktrack(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.SmallCommentStart =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'[' =>
|
||||
{
|
||||
tokenType.* = null;
|
||||
state.* = TokenizerState.BigCommentLongBracketStart;
|
||||
},
|
||||
'\n' =>
|
||||
{
|
||||
state.* = TokenizerState.Start;
|
||||
lastIndex.* = null;
|
||||
},
|
||||
else =>
|
||||
{
|
||||
state.* = TokenizerState.SmallComment;
|
||||
},
|
||||
}
|
||||
},
|
||||
TokenizerState.SmallComment =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'\n' =>
|
||||
{
|
||||
state.* = TokenizerState.Start;
|
||||
lastIndex.* = null;
|
||||
},
|
||||
else => { }
|
||||
}
|
||||
},
|
||||
TokenizerState.BigCommentLongBracketStart =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'=' =>
|
||||
{
|
||||
longBracketLevel.* += 1;
|
||||
},
|
||||
'[' =>
|
||||
{
|
||||
state.* = TokenizerState.BigComment;
|
||||
},
|
||||
else => return error.LongBracketMalformedStartBigComment,
|
||||
}
|
||||
},
|
||||
TokenizerState.BigComment =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
']' =>
|
||||
{
|
||||
state.* = TokenizerState.BigCommentLongBracketEnd;
|
||||
},
|
||||
else => { },
|
||||
}
|
||||
},
|
||||
TokenizerState.BigCommentLongBracketEnd =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'=' =>
|
||||
{
|
||||
if(longBracketLevel.* == 0)
|
||||
{
|
||||
return error.LongBracketLevelTooBigEndBigComment;
|
||||
}
|
||||
longBracketLevel.* -= 1;
|
||||
},
|
||||
']' =>
|
||||
{
|
||||
if(longBracketLevel.* != 0)
|
||||
{
|
||||
return error.LongBracketLevelTooSmallEndBigComment;
|
||||
}
|
||||
state.* = TokenizerState.Start;
|
||||
},
|
||||
else => return error.LongBracketMalformedSmallEndBigComment,
|
||||
}
|
||||
},
|
||||
TokenizerState.A =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.An, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.An =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'd' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.And, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.And => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.And, allocator),
|
||||
TokenizerState.W =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'h' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Wh, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Wh =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Whi, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Whi =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Whil, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Whil =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.While, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.While => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.While, allocator),
|
||||
TokenizerState.B =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Br, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Br =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Bre, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Bre =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Brea, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Brea =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'k' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Break, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Break => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Break, allocator),
|
||||
TokenizerState.G =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Go, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Go =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Got, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Got =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Goto, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Goto => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Goto, allocator),
|
||||
TokenizerState.R =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Re, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Re =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Ret, tokenStr, ch),
|
||||
'p' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Rep, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Ret =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Retu, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Retu =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Retur, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Retur =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Return, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Return => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Return, allocator),
|
||||
TokenizerState.Rep =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repe, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Repe =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repea, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Repea =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Repeat, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Repeat => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Repeat, allocator),
|
||||
TokenizerState.N =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Ni, tokenStr, ch),
|
||||
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.No, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.No =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Not, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Not => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Not, allocator),
|
||||
TokenizerState.Ni =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Nil, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Nil => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Nil, allocator),
|
||||
TokenizerState.T =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'h' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Th, tokenStr, ch),
|
||||
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Tr, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Th =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.The, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.The =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Then, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Then => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Then, allocator),
|
||||
TokenizerState.Tr =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Tru, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Tru =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.True, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.True => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.True, allocator),
|
||||
TokenizerState.E =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.El, tokenStr, ch),
|
||||
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.En, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.En =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'd' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.End, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.End => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.End, allocator),
|
||||
TokenizerState.El =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
's' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Els, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Els =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Else, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Else =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Elsei, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Else, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Elsei =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'f' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Elseif, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Elseif => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Elseif, allocator),
|
||||
TokenizerState.O =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Or, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Or => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Or, allocator),
|
||||
TokenizerState.D =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Do, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Do => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Do, allocator),
|
||||
TokenizerState.I =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'f' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.If, tokenStr, ch),
|
||||
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.In, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.In => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.In, allocator),
|
||||
TokenizerState.If => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.If, allocator),
|
||||
TokenizerState.F =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fa, tokenStr, ch),
|
||||
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fo, tokenStr, ch),
|
||||
'u' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fu, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Fu =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fun, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Fun =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'c' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Func, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Func =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Funct, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Funct =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Functi, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Functi =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Functio, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Functio =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Function, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Function => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Function, allocator),
|
||||
TokenizerState.Fa =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fal, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Fal =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
's' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Fals, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Fals =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'e' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.False, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.False => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.False, allocator),
|
||||
TokenizerState.Fo =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'r' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.For, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.For => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.For, allocator),
|
||||
TokenizerState.L =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'o' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Lo, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Lo =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'c' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Loc, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Loc =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'a' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Loca, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Loca =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Local, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Local => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Local, allocator),
|
||||
TokenizerState.U =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'n' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Un, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Un =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
't' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Unt, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Unt =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'i' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Unti, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Unti =>
|
||||
{
|
||||
switch(ch)
|
||||
{
|
||||
'l' => try tokenizeTerminalNoToken(lastIndex, index.*, state, TokenizerState.Until, tokenStr, ch),
|
||||
else => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Name, allocator),
|
||||
}
|
||||
},
|
||||
TokenizerState.Until => try tokenizeAlphanumericNonstart(lastIndex, index, tokens, tokenType, tokenStr, tokenNumeral, state, ch, TokenType.Until, allocator),
|
||||
else =>
|
||||
{
|
||||
std.debug.print("{}\n", . {state.*});
|
||||
return error.NotImplemented;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tokenize(file_content: []u8, allocator: std.mem.Allocator) ![]Token
|
||||
{
|
||||
var tokens = std.ArrayList(Token).init(allocator);
|
||||
var state: TokenizerState = TokenizerState.Start;
|
||||
var lastIndex: ?usize = null;
|
||||
var index: usize = 0;
|
||||
var tokenType: ?TokenType = null;
|
||||
var tokenStr = std.ArrayList(u8).init(allocator);
|
||||
defer tokenStr.deinit();
|
||||
var tokenNumeral: ?types.Numeral = null;
|
||||
var longBracketLevel: u32 = 0;
|
||||
|
||||
while(index < file_content.len)
|
||||
{
|
||||
const ch = file_content[index];
|
||||
try tokenizeChar(&state, ch, &lastIndex, &index, &tokenType, &tokenStr, &tokenNumeral, &tokens, &longBracketLevel, allocator);
|
||||
index += 1;
|
||||
}
|
||||
return tokens.toOwnedSlice();
|
||||
}
|
5
src/types.zig
Normal file
5
src/types.zig
Normal file
@ -0,0 +1,5 @@
|
||||
pub const Numeral = union(enum)
|
||||
{
|
||||
Integer: i64,
|
||||
Float: f64,
|
||||
};
|
1
test/test.lua
Normal file
1
test/test.lua
Normal file
@ -0,0 +1 @@
|
||||
local t=(string.find(originalField.af,'m') and originalField.tableAction) or c.tableAction or originalField.tableAction or tableActionGeneric
|
Loading…
x
Reference in New Issue
Block a user