From e53734dccad6434ac82db0e6daa1a23f24dc2b14 Mon Sep 17 00:00:00 2001 From: 0x4261756D <38735823+0x4261756D@users.noreply.github.com> Date: Wed, 12 Jul 2023 23:25:20 +0200 Subject: [PATCH] Add terribly slow cyk parser --- src/grammar.rs | 698 ++++++++++++++++++++++++----------------------- src/main.rs | 1 + src/parser.rs | 347 ++++++----------------- src/tokenizer.rs | 17 +- 4 files changed, 445 insertions(+), 618 deletions(-) diff --git a/src/grammar.rs b/src/grammar.rs index d6a48eb..f66258b 100644 --- a/src/grammar.rs +++ b/src/grammar.rs @@ -6,7 +6,7 @@ pub enum Rule NonTerminal(u8, u8, u8) } -pub const NONTERMS: [&str; 115] = +pub const NONTERMINAL_NAMES: [&str; 115] = [ "stat__15", "funcbody__50", @@ -125,350 +125,356 @@ pub const NONTERMS: [&str; 115] = "retstat__47", ]; -pub const GRAMMAR: [Rule; 344] = +pub const TERMINAL_RULES: [(u8, Token); 125] = [ - Rule::Terminal(57, Token::RoundOpen), - Rule::Terminal(34, Token::RoundClosed), - Rule::Terminal(8, Token::Comma), - Rule::Terminal(66, Token::DotDotDot), - Rule::Terminal(69, Token::Dot), - Rule::Terminal(39, Token::ColonColon), - Rule::Terminal(99, Token::Colon), - Rule::Terminal(15, Token::Semicolon), - Rule::Terminal(20, Token::Lt), - Rule::Terminal(98, Token::Equals), - Rule::Terminal(6, Token::Gt), - Rule::Terminal(81, Token::Name(String::new())), - Rule::NonTerminal(80, 22, 29), - Rule::NonTerminal(80, 68, 114), - Rule::Terminal(80, Token::Return), - Rule::NonTerminal(80, 40, 22), - Rule::Terminal(80, Token::Semicolon), - Rule::Terminal(80, Token::Break), - Rule::NonTerminal(80, 82, 81), - Rule::NonTerminal(80, 79, 44), - Rule::NonTerminal(80, 47, 25), - Rule::NonTerminal(80, 63, 46), - Rule::NonTerminal(80, 104, 59), - Rule::NonTerminal(80, 85, 30), - Rule::NonTerminal(80, 75, 89), - Rule::NonTerminal(80, 33, 0), - Rule::NonTerminal(80, 58, 93), - Rule::NonTerminal(80, 85, 31), - Rule::NonTerminal(80, 33, 11), - Rule::NonTerminal(80, 71, 87), - Rule::NonTerminal(80, 71, 73), - Rule::NonTerminal(80, 39, 51), - Rule::Terminal(43, Token::SquareOpen), - Rule::Terminal(42, Token::SquareClosed), - Rule::Terminal(87, Token::StringLiteral(String::new())), - Rule::NonTerminal(87, 57, 105), - Rule::NonTerminal(87, 23, 78), - Rule::NonTerminal(105, 21, 34), - Rule::Terminal(105, Token::RoundClosed), - Rule::NonTerminal(44, 98, 21), - Rule::NonTerminal(72, 81, 95), - Rule::Terminal(72, Token::Name(String::new())), - Rule::NonTerminal(95, 103, 27), - Rule::NonTerminal(95, 20, 60), - Rule::NonTerminal(95, 112, 27), - Rule::NonTerminal(95, 8, 36), - Rule::NonTerminal(103, 20, 60), - Rule::NonTerminal(60, 81, 6), - Rule::Terminal(65, Token::Plus), - Rule::Terminal(65, Token::Minus), - Rule::Terminal(65, Token::Star), - Rule::Terminal(65, Token::Slash), - Rule::Terminal(65, Token::SlashSlash), - Rule::Terminal(65, Token::Caret), - Rule::Terminal(65, Token::Percent), - Rule::Terminal(65, Token::Ampersand), - Rule::Terminal(65, Token::Pipe), - Rule::Terminal(65, Token::GtGt), - Rule::Terminal(65, Token::LtLt), - Rule::Terminal(65, Token::DotDot), - Rule::Terminal(65, Token::Lt), - Rule::Terminal(65, Token::LtEquals), - Rule::Terminal(65, Token::Gt), - Rule::Terminal(65, Token::GtEquals), - Rule::Terminal(65, Token::EqualsEquals), - Rule::Terminal(65, Token::TildeEquals), - Rule::Terminal(65, Token::And), - Rule::Terminal(65, Token::Or), - Rule::NonTerminal(7, 22, 29), - Rule::NonTerminal(7, 68, 114), - Rule::Terminal(7, Token::Return), - Rule::NonTerminal(7, 40, 22), - Rule::Terminal(7, Token::Semicolon), - Rule::Terminal(7, Token::Break), - Rule::NonTerminal(7, 82, 81), - Rule::NonTerminal(7, 79, 44), - Rule::NonTerminal(7, 47, 25), - Rule::NonTerminal(7, 63, 46), - Rule::NonTerminal(7, 104, 59), - Rule::NonTerminal(7, 85, 30), - Rule::NonTerminal(7, 75, 89), - Rule::NonTerminal(7, 33, 0), - Rule::NonTerminal(7, 58, 93), - Rule::NonTerminal(7, 85, 31), - Rule::NonTerminal(7, 33, 11), - Rule::NonTerminal(7, 71, 87), - Rule::NonTerminal(7, 71, 73), - Rule::NonTerminal(7, 39, 51), - Rule::Terminal(47, Token::Do), - Rule::Terminal(18, Token::Else), - Rule::NonTerminal(50, 18, 7), - Rule::Terminal(50, Token::Else), - Rule::Terminal(84, Token::Elseif), - Rule::NonTerminal(74, 84, 61), - Rule::NonTerminal(64, 74, 64), - Rule::NonTerminal(64, 84, 61), - Rule::NonTerminal(61, 55, 49), - Rule::NonTerminal(49, 67, 7), - Rule::Terminal(49, Token::Then), - Rule::Terminal(94, Token::End), - Rule::Terminal(55, Token::Nil), - Rule::Terminal(55, Token::False), - Rule::Terminal(55, Token::True), - Rule::Terminal(55, Token::Numeral(String::new())), - Rule::Terminal(55, Token::StringLiteral(String::new())), - Rule::Terminal(55, Token::DotDotDot), - Rule::NonTerminal(55, 48, 55), - Rule::NonTerminal(55, 55, 109), - Rule::NonTerminal(55, 75, 113), - Rule::NonTerminal(55, 23, 78), - Rule::NonTerminal(55, 57, 14), - Rule::Terminal(55, Token::Name(String::new())), - Rule::NonTerminal(55, 71, 77), - Rule::NonTerminal(55, 71, 26), - Rule::NonTerminal(55, 71, 87), - Rule::NonTerminal(55, 71, 73), - Rule::NonTerminal(109, 65, 55), - Rule::NonTerminal(21, 55, 96), - Rule::Terminal(21, Token::Nil), - Rule::Terminal(21, Token::False), - Rule::Terminal(21, Token::True), - Rule::Terminal(21, Token::Numeral(String::new())), - Rule::Terminal(21, Token::StringLiteral(String::new())), - Rule::Terminal(21, Token::DotDotDot), - Rule::NonTerminal(21, 48, 55), - Rule::NonTerminal(21, 55, 109), - Rule::NonTerminal(21, 75, 113), - Rule::NonTerminal(21, 23, 78), - Rule::NonTerminal(21, 57, 14), - Rule::Terminal(21, Token::Name(String::new())), - Rule::NonTerminal(21, 71, 77), - Rule::NonTerminal(21, 71, 26), - Rule::NonTerminal(21, 71, 87), - Rule::NonTerminal(21, 71, 73), - Rule::NonTerminal(32, 43, 28), - Rule::NonTerminal(32, 81, 45), - Rule::Terminal(32, Token::Nil), - Rule::Terminal(32, Token::False), - Rule::Terminal(32, Token::True), - Rule::Terminal(32, Token::Numeral(String::new())), - Rule::Terminal(32, Token::StringLiteral(String::new())), - Rule::Terminal(32, Token::DotDotDot), - Rule::NonTerminal(32, 48, 55), - Rule::NonTerminal(32, 55, 109), - Rule::NonTerminal(32, 75, 113), - Rule::NonTerminal(32, 23, 78), - Rule::NonTerminal(32, 57, 14), - Rule::Terminal(32, Token::Name(String::new())), - Rule::NonTerminal(32, 71, 77), - Rule::NonTerminal(32, 71, 26), - Rule::NonTerminal(32, 71, 87), - Rule::NonTerminal(32, 71, 73), - Rule::NonTerminal(28, 55, 9), - Rule::NonTerminal(9, 42, 45), - Rule::NonTerminal(45, 98, 55), - Rule::NonTerminal(2, 32, 3), - Rule::NonTerminal(2, 43, 28), - Rule::NonTerminal(2, 81, 45), - Rule::Terminal(2, Token::Nil), - Rule::Terminal(2, Token::False), - Rule::Terminal(2, Token::True), - Rule::Terminal(2, Token::Numeral(String::new())), - Rule::Terminal(2, Token::StringLiteral(String::new())), - Rule::Terminal(2, Token::DotDotDot), - Rule::NonTerminal(2, 48, 55), - Rule::NonTerminal(2, 55, 109), - Rule::NonTerminal(2, 75, 113), - Rule::NonTerminal(2, 23, 78), - Rule::NonTerminal(2, 57, 14), - Rule::Terminal(2, Token::Name(String::new())), - Rule::NonTerminal(2, 71, 77), - Rule::NonTerminal(2, 71, 26), - Rule::NonTerminal(2, 71, 87), - Rule::NonTerminal(2, 71, 73), - Rule::NonTerminal(3, 41, 62), - Rule::Terminal(3, Token::Comma), - Rule::Terminal(3, Token::Semicolon), - Rule::NonTerminal(3, 111, 41), - Rule::NonTerminal(3, 62, 32), - Rule::Terminal(62, Token::Comma), - Rule::Terminal(62, Token::Semicolon), - Rule::Terminal(85, Token::For), - Rule::NonTerminal(38, 8, 55), - Rule::NonTerminal(113, 57, 1), - Rule::NonTerminal(1, 5, 52), - Rule::NonTerminal(1, 34, 25), - Rule::NonTerminal(52, 34, 25), - Rule::NonTerminal(13, 81, 102), - Rule::Terminal(13, Token::Name(String::new())), - Rule::NonTerminal(102, 24, 56), - Rule::NonTerminal(102, 26, 24), - Rule::NonTerminal(102, 69, 81), - Rule::NonTerminal(102, 99, 81), - Rule::NonTerminal(56, 99, 81), - Rule::NonTerminal(26, 69, 81), - Rule::NonTerminal(24, 26, 24), - Rule::NonTerminal(24, 69, 81), - Rule::Terminal(75, Token::Function), - Rule::NonTerminal(73, 99, 101), - Rule::NonTerminal(101, 81, 87), - Rule::Terminal(82, Token::Goto), - Rule::Terminal(58, Token::If), - Rule::Terminal(53, Token::In), - Rule::NonTerminal(51, 81, 39), - Rule::Terminal(33, Token::Local), - Rule::NonTerminal(112, 8, 36), - Rule::NonTerminal(27, 112, 27), - Rule::NonTerminal(27, 8, 36), - Rule::NonTerminal(36, 81, 103), - Rule::Terminal(36, Token::Name(String::new())), - Rule::NonTerminal(96, 38, 96), - Rule::NonTerminal(96, 8, 55), - Rule::NonTerminal(111, 62, 32), - Rule::NonTerminal(41, 111, 41), - Rule::NonTerminal(41, 62, 32), - Rule::NonTerminal(91, 37, 91), - Rule::NonTerminal(91, 8, 81), - Rule::NonTerminal(37, 8, 81), - Rule::NonTerminal(35, 37, 35), - Rule::NonTerminal(35, 8, 81), - Rule::NonTerminal(4, 81, 91), - Rule::Terminal(4, Token::Name(String::new())), - Rule::Terminal(5, Token::DotDotDot), - Rule::NonTerminal(5, 4, 83), - Rule::NonTerminal(5, 81, 91), - Rule::Terminal(5, Token::Name(String::new())), - Rule::NonTerminal(83, 8, 66), - Rule::NonTerminal(71, 57, 14), - Rule::Terminal(71, Token::Name(String::new())), - Rule::NonTerminal(71, 71, 77), - Rule::NonTerminal(71, 71, 26), - Rule::NonTerminal(71, 71, 87), - Rule::NonTerminal(71, 71, 73), - Rule::NonTerminal(14, 55, 34), - Rule::Terminal(104, Token::Repeat), - Rule::NonTerminal(29, 68, 114), - Rule::Terminal(29, Token::Return), - Rule::NonTerminal(114, 21, 15), - Rule::Terminal(114, Token::Semicolon), - Rule::NonTerminal(114, 55, 96), - Rule::Terminal(114, Token::Nil), - Rule::Terminal(114, Token::False), - Rule::Terminal(114, Token::True), - Rule::Terminal(114, Token::Numeral(String::new())), - Rule::Terminal(114, Token::StringLiteral(String::new())), - Rule::Terminal(114, Token::DotDotDot), - Rule::NonTerminal(114, 48, 55), - Rule::NonTerminal(114, 55, 109), - Rule::NonTerminal(114, 75, 113), - Rule::NonTerminal(114, 23, 78), - Rule::NonTerminal(114, 57, 14), - Rule::Terminal(114, Token::Name(String::new())), - Rule::NonTerminal(114, 71, 77), - Rule::NonTerminal(114, 71, 26), - Rule::NonTerminal(114, 71, 87), - Rule::NonTerminal(114, 71, 73), - Rule::Terminal(68, Token::Return), - Rule::Terminal(40, Token::Semicolon), - Rule::Terminal(40, Token::Break), - Rule::NonTerminal(40, 82, 81), - Rule::NonTerminal(40, 79, 44), - Rule::NonTerminal(40, 47, 25), - Rule::NonTerminal(40, 63, 46), - Rule::NonTerminal(40, 104, 59), - Rule::NonTerminal(40, 85, 30), - Rule::NonTerminal(40, 75, 89), - Rule::NonTerminal(40, 33, 0), - Rule::NonTerminal(40, 58, 93), - Rule::NonTerminal(40, 85, 31), - Rule::NonTerminal(40, 33, 11), - Rule::NonTerminal(40, 71, 87), - Rule::NonTerminal(40, 71, 73), - Rule::NonTerminal(40, 39, 51), - Rule::NonTerminal(22, 40, 22), - Rule::Terminal(22, Token::Semicolon), - Rule::Terminal(22, Token::Break), - Rule::NonTerminal(22, 82, 81), - Rule::NonTerminal(22, 79, 44), - Rule::NonTerminal(22, 47, 25), - Rule::NonTerminal(22, 63, 46), - Rule::NonTerminal(22, 104, 59), - Rule::NonTerminal(22, 85, 30), - Rule::NonTerminal(22, 75, 89), - Rule::NonTerminal(22, 33, 0), - Rule::NonTerminal(22, 58, 93), - Rule::NonTerminal(22, 85, 31), - Rule::NonTerminal(22, 33, 11), - Rule::NonTerminal(22, 71, 87), - Rule::NonTerminal(22, 71, 73), - Rule::NonTerminal(22, 39, 51), - Rule::NonTerminal(97, 53, 10), - Rule::NonTerminal(10, 21, 16), - Rule::NonTerminal(89, 13, 113), - Rule::NonTerminal(0, 75, 19), - Rule::NonTerminal(19, 81, 113), - Rule::NonTerminal(93, 55, 12), - Rule::NonTerminal(12, 67, 54), - Rule::NonTerminal(54, 7, 70), - Rule::NonTerminal(54, 64, 100), - Rule::NonTerminal(54, 50, 94), - Rule::Terminal(54, Token::End), - Rule::NonTerminal(70, 64, 100), - Rule::NonTerminal(70, 50, 94), - Rule::Terminal(70, Token::End), - Rule::NonTerminal(100, 50, 94), - Rule::Terminal(100, Token::End), - Rule::NonTerminal(31, 81, 17), - Rule::NonTerminal(17, 98, 92), - Rule::NonTerminal(92, 55, 108), - Rule::NonTerminal(46, 55, 16), - Rule::NonTerminal(108, 8, 88), - Rule::NonTerminal(88, 55, 110), - Rule::NonTerminal(110, 38, 16), - Rule::NonTerminal(110, 47, 25), - Rule::NonTerminal(11, 72, 44), - Rule::NonTerminal(11, 81, 95), - Rule::Terminal(11, Token::Name(String::new())), - Rule::NonTerminal(16, 47, 25), - Rule::NonTerminal(25, 7, 94), - Rule::Terminal(25, Token::End), - Rule::NonTerminal(59, 7, 86), - Rule::NonTerminal(59, 90, 55), - Rule::NonTerminal(86, 90, 55), - Rule::NonTerminal(30, 4, 97), - Rule::NonTerminal(78, 2, 76), - Rule::Terminal(78, Token::CurlyClosed), - Rule::Terminal(67, Token::Then), - Rule::Terminal(48, Token::Minus), - Rule::Terminal(48, Token::Not), - Rule::Terminal(48, Token::Hash), - Rule::Terminal(48, Token::Tilde), - Rule::Terminal(90, Token::Until), - Rule::Terminal(106, Token::Name(String::new())), - Rule::NonTerminal(106, 71, 77), - Rule::NonTerminal(106, 71, 26), - Rule::NonTerminal(77, 43, 107), - Rule::NonTerminal(107, 55, 42), - Rule::NonTerminal(79, 106, 35), - Rule::Terminal(79, Token::Name(String::new())), - Rule::NonTerminal(79, 71, 77), - Rule::NonTerminal(79, 71, 26), - Rule::Terminal(63, Token::While), - Rule::Terminal(23, Token::CurlyOpen), - Rule::Terminal(76, Token::CurlyClosed), + (57, Token::RoundOpen), + (34, Token::RoundClosed), + (8, Token::Comma), + (66, Token::DotDotDot), + (69, Token::Dot), + (39, Token::ColonColon), + (99, Token::Colon), + (15, Token::Semicolon), + (20, Token::Lt), + (98, Token::Equals), + (6, Token::Gt), + (81, Token::Name(String::new())), + (80, Token::Return), + (80, Token::Semicolon), + (80, Token::Break), + (43, Token::SquareOpen), + (42, Token::SquareClosed), + (87, Token::StringLiteral(String::new())), + (105, Token::RoundClosed), + (72, Token::Name(String::new())), + (65, Token::Plus), + (65, Token::Minus), + (65, Token::Star), + (65, Token::Slash), + (65, Token::SlashSlash), + (65, Token::Caret), + (65, Token::Percent), + (65, Token::Ampersand), + (65, Token::Pipe), + (65, Token::GtGt), + (65, Token::LtLt), + (65, Token::DotDot), + (65, Token::Lt), + (65, Token::LtEquals), + (65, Token::Gt), + (65, Token::GtEquals), + (65, Token::EqualsEquals), + (65, Token::TildeEquals), + (65, Token::And), + (65, Token::Or), + (7, Token::Return), + (7, Token::Semicolon), + (7, Token::Break), + (47, Token::Do), + (18, Token::Else), + (50, Token::Else), + (84, Token::Elseif), + (49, Token::Then), + (94, Token::End), + (55, Token::Nil), + (55, Token::False), + (55, Token::True), + (55, Token::Numeral(String::new())), + (55, Token::StringLiteral(String::new())), + (55, Token::DotDotDot), + (55, Token::Name(String::new())), + (21, Token::Nil), + (21, Token::False), + (21, Token::True), + (21, Token::Numeral(String::new())), + (21, Token::StringLiteral(String::new())), + (21, Token::DotDotDot), + (21, Token::Name(String::new())), + (32, Token::Nil), + (32, Token::False), + (32, Token::True), + (32, Token::Numeral(String::new())), + (32, Token::StringLiteral(String::new())), + (32, Token::DotDotDot), + (32, Token::Name(String::new())), + (2, Token::Nil), + (2, Token::False), + (2, Token::True), + (2, Token::Numeral(String::new())), + (2, Token::StringLiteral(String::new())), + (2, Token::DotDotDot), + (2, Token::Name(String::new())), + (3, Token::Comma), + (3, Token::Semicolon), + (62, Token::Comma), + (62, Token::Semicolon), + (85, Token::For), + (13, Token::Name(String::new())), + (75, Token::Function), + (82, Token::Goto), + (58, Token::If), + (53, Token::In), + (33, Token::Local), + (36, Token::Name(String::new())), + (4, Token::Name(String::new())), + (5, Token::DotDotDot), + (5, Token::Name(String::new())), + (71, Token::Name(String::new())), + (104, Token::Repeat), + (29, Token::Return), + (114, Token::Semicolon), + (114, Token::Nil), + (114, Token::False), + (114, Token::True), + (114, Token::Numeral(String::new())), + (114, Token::StringLiteral(String::new())), + (114, Token::DotDotDot), + (114, Token::Name(String::new())), + (68, Token::Return), + (40, Token::Semicolon), + (40, Token::Break), + (22, Token::Semicolon), + (22, Token::Break), + (54, Token::End), + (70, Token::End), + (100, Token::End), + (11, Token::Name(String::new())), + (25, Token::End), + (78, Token::CurlyClosed), + (67, Token::Then), + (48, Token::Minus), + (48, Token::Not), + (48, Token::Hash), + (48, Token::Tilde), + (90, Token::Until), + (106, Token::Name(String::new())), + (79, Token::Name(String::new())), + (63, Token::While), + (23, Token::CurlyOpen), + (76, Token::CurlyClosed), + ]; + +pub const NONTERMINAL_RULES: [(u8, u8, u8); 219] = +[ + (80, 22, 29), + (80, 68, 114), + (80, 40, 22), + (80, 82, 81), + (80, 79, 44), + (80, 47, 25), + (80, 63, 46), + (80, 104, 59), + (80, 85, 30), + (80, 75, 89), + (80, 33, 0), + (80, 58, 93), + (80, 85, 31), + (80, 33, 11), + (80, 71, 87), + (80, 71, 73), + (80, 39, 51), + (87, 57, 105), + (87, 23, 78), + (105, 21, 34), + (44, 98, 21), + (72, 81, 95), + (95, 103, 27), + (95, 20, 60), + (95, 112, 27), + (95, 8, 36), + (103, 20, 60), + (60, 81, 6), + (7, 22, 29), + (7, 68, 114), + (7, 40, 22), + (7, 82, 81), + (7, 79, 44), + (7, 47, 25), + (7, 63, 46), + (7, 104, 59), + (7, 85, 30), + (7, 75, 89), + (7, 33, 0), + (7, 58, 93), + (7, 85, 31), + (7, 33, 11), + (7, 71, 87), + (7, 71, 73), + (7, 39, 51), + (50, 18, 7), + (74, 84, 61), + (64, 74, 64), + (64, 84, 61), + (61, 55, 49), + (49, 67, 7), + (55, 48, 55), + (55, 55, 109), + (55, 75, 113), + (55, 23, 78), + (55, 57, 14), + (55, 71, 77), + (55, 71, 26), + (55, 71, 87), + (55, 71, 73), + (109, 65, 55), + (21, 55, 96), + (21, 48, 55), + (21, 55, 109), + (21, 75, 113), + (21, 23, 78), + (21, 57, 14), + (21, 71, 77), + (21, 71, 26), + (21, 71, 87), + (21, 71, 73), + (32, 43, 28), + (32, 81, 45), + (32, 48, 55), + (32, 55, 109), + (32, 75, 113), + (32, 23, 78), + (32, 57, 14), + (32, 71, 77), + (32, 71, 26), + (32, 71, 87), + (32, 71, 73), + (28, 55, 9), + (9, 42, 45), + (45, 98, 55), + (2, 32, 3), + (2, 43, 28), + (2, 81, 45), + (2, 48, 55), + (2, 55, 109), + (2, 75, 113), + (2, 23, 78), + (2, 57, 14), + (2, 71, 77), + (2, 71, 26), + (2, 71, 87), + (2, 71, 73), + (3, 41, 62), + (3, 111, 41), + (3, 62, 32), + (38, 8, 55), + (113, 57, 1), + (1, 5, 52), + (1, 34, 25), + (52, 34, 25), + (13, 81, 102), + (102, 24, 56), + (102, 26, 24), + (102, 69, 81), + (102, 99, 81), + (56, 99, 81), + (26, 69, 81), + (24, 26, 24), + (24, 69, 81), + (73, 99, 101), + (101, 81, 87), + (51, 81, 39), + (112, 8, 36), + (27, 112, 27), + (27, 8, 36), + (36, 81, 103), + (96, 38, 96), + (96, 8, 55), + (111, 62, 32), + (41, 111, 41), + (41, 62, 32), + (91, 37, 91), + (91, 8, 81), + (37, 8, 81), + (35, 37, 35), + (35, 8, 81), + (4, 81, 91), + (5, 4, 83), + (5, 81, 91), + (83, 8, 66), + (71, 57, 14), + (71, 71, 77), + (71, 71, 26), + (71, 71, 87), + (71, 71, 73), + (14, 55, 34), + (29, 68, 114), + (114, 21, 15), + (114, 55, 96), + (114, 48, 55), + (114, 55, 109), + (114, 75, 113), + (114, 23, 78), + (114, 57, 14), + (114, 71, 77), + (114, 71, 26), + (114, 71, 87), + (114, 71, 73), + (40, 82, 81), + (40, 79, 44), + (40, 47, 25), + (40, 63, 46), + (40, 104, 59), + (40, 85, 30), + (40, 75, 89), + (40, 33, 0), + (40, 58, 93), + (40, 85, 31), + (40, 33, 11), + (40, 71, 87), + (40, 71, 73), + (40, 39, 51), + (22, 40, 22), + (22, 82, 81), + (22, 79, 44), + (22, 47, 25), + (22, 63, 46), + (22, 104, 59), + (22, 85, 30), + (22, 75, 89), + (22, 33, 0), + (22, 58, 93), + (22, 85, 31), + (22, 33, 11), + (22, 71, 87), + (22, 71, 73), + (22, 39, 51), + (97, 53, 10), + (10, 21, 16), + (89, 13, 113), + (0, 75, 19), + (19, 81, 113), + (93, 55, 12), + (12, 67, 54), + (54, 7, 70), + (54, 64, 100), + (54, 50, 94), + (70, 64, 100), + (70, 50, 94), + (100, 50, 94), + (31, 81, 17), + (17, 98, 92), + (92, 55, 108), + (46, 55, 16), + (108, 8, 88), + (88, 55, 110), + (110, 38, 16), + (110, 47, 25), + (11, 72, 44), + (11, 81, 95), + (16, 47, 25), + (25, 7, 94), + (59, 7, 86), + (59, 90, 55), + (86, 90, 55), + (30, 4, 97), + (78, 2, 76), + (106, 71, 77), + (106, 71, 26), + (77, 43, 107), + (107, 55, 42), + (79, 106, 35), + (79, 71, 77), + (79, 71, 26), + +]; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 56a6351..1719c96 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ pub mod tokenizer; pub mod parser; +pub mod grammar; use std::{env, fs}; diff --git a/src/parser.rs b/src/parser.rs index 73ddf39..734af39 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,290 +1,111 @@ -use std::slice::Iter; +use std::{slice::Iter, iter::{Peekable, Map}, collections::HashMap}; use crate::tokenizer::Token; -#[derive(Debug)] -pub struct BlockNode -{ - stats: Vec, - retstat: Option -} -#[derive(Debug)] -pub enum StatNode -{ - Semicolon, - Assignment(AssignmentNode), - FunctionCall(FunctionCallNode), - Label(LabelNode), - Break, - Goto(GotoNode), - Block(BlockNode), - While(WhileNode), - Repeat(RepeatNode), - If(IfNode), - For(ForNode), - ForIn(ForInNode), - Function(FunctionNode), - LocalFunction(LocalFunctionNode), - Local(LocalNode), -} -#[derive(Debug)] -pub struct AssignmentNode -{ +use crate::grammar::{NONTERMINAL_NAMES, Rule, NONTERMINAL_RULES, TERMINAL_RULES}; -} -#[derive(Debug)] -pub struct FunctionCallNode -{ - -} -#[derive(Debug)] -pub struct RetstatNode -{ - -} -#[derive(Debug)] -pub struct LabelNode -{ - -} -#[derive(Debug)] -pub struct GotoNode -{ - label: String -} -#[derive(Debug)] -pub struct WhileNode -{ - condition: ExpNode, - block: BlockNode, -} -#[derive(Debug)] -pub struct RepeatNode -{ - condition: ExpNode, - block: BlockNode, -} -#[derive(Debug)] -pub struct IfNode -{ - condition: ExpNode, - then_block: BlockNode, - elseifs: Option> -} -#[derive(Debug)] -pub struct ElseIfNode -{ - condition: ExpNode, - block: BlockNode, -} -#[derive(Debug)] -pub struct ForNode -{ - -} -#[derive(Debug)] -pub struct ForInNode -{ - -} -#[derive(Debug)] -pub struct FunctionNode -{ - -} -#[derive(Debug)] -pub struct LocalFunctionNode -{ - -} -#[derive(Debug)] -pub struct LocalNode -{ - -} -#[derive(Debug)] -pub struct ExpNode +#[derive(Debug, Clone, Copy)] +pub struct Node { } -pub fn parse(tokens: Vec) -> Result +#[derive(Debug, Clone, Copy)] +pub struct AmbiguousNode { - let mut iter = tokens.iter(); - return parse_block(&mut iter); + } -fn parse_block(tokens: &mut Iter<'_, Token>) -> Result +pub fn parse(tokens: Vec) -> Result { - let mut block_node = BlockNode { retstat: None, stats: Vec::new() }; - while let Some(stat) = parse_stat(tokens)? - { - block_node.stats.push(stat); + return cyk(tokens); +} + +pub fn cyk(tokens: Vec) -> Result +{ + let r = NONTERMINAL_NAMES.len(); + let n = tokens.len(); + macro_rules! index { + ($x:expr, $y:expr, $z:expr) => { + ($x + $y * n + ($z as usize) * n * n) + }; } - block_node.retstat = parse_retstat(tokens)?; - return Ok(block_node); -} - -fn parse_stat(tokens: &mut Iter<'_, Token>) -> Result, &'static str> -{ - match tokens.next() + let mut p = vec![false; n * n * r]; + //let mut back: Vec> = vec![Vec::new(); n * n * r]; + println!("{n}, {r}, {}", p.len()); + for s in 0..n { - Some(token) => + for (index, token) in TERMINAL_RULES { - match token + if let Token::Name(_) = tokens[s] { - Token::Semicolon => return Ok(Some(StatNode::Semicolon)), - Token::Break => return Ok(Some(StatNode::Break)), - Token::Goto => + if let Token::Name(_) = token { - match tokens.next() - { - Some(Token::Name(string)) => return Ok(Some(StatNode::Goto(GotoNode { label: string.clone() }))), - _ => return Err("Failed to parse goto statement") - } - } - Token::Do => + p[index!(0, s, index)] = true + } + } + else if let Token::StringLiteral(_) = tokens[s] + { + if let Token::StringLiteral(_) = token { - let block = parse_block(tokens)?; - match tokens.next() - { - Some(Token::End) => return Ok(Some(StatNode::Block(block))), - _ => return Err("Failed to parse block statement") - } - } - Token::While => + p[index!(0, s, index)] = true + } + } + else if let Token::Numeral(_) = tokens[s] + { + if let Token::Numeral(_) = token { - if let Some(condition) = parse_exp(tokens)? - { - match tokens.next() - { - Some(Token::Do) => - { - let block = parse_block(tokens)?; - match tokens.next() - { - Some(Token::End) => return Ok(Some(StatNode::While(WhileNode { condition, block }))), - _ => Err("No end after while block") - } - } - _ => Err("No do after while condition"), - } - } - else - { - return Err("Failed to parse while condition"); - } - } - Token::Repeat => - { - let block = parse_block(tokens)?; - match tokens.next() - { - Some(Token::Until) => - { - match parse_exp(tokens)? - { - Some(exp) => return Ok(Some(StatNode::Repeat(RepeatNode { condition: exp, block }))), - None => Err("Failed to parse repeat condition") - } - } - _ => Err("Missing until after repeat block") - } - } - Token::If => - { - if let Some(condition) = parse_exp(tokens)? - { - match tokens.next() - { - Some(Token::Then) => - { - let block = parse_block(tokens)?; - let mut elseifs: Vec = Vec::new(); - loop - { - match tokens.next() - { - Some(Token::Elseif) => - { - if let Some(elseif_condition) = parse_exp(tokens)? - { - match tokens.next() - { - Some(Token::Then) => - { - elseifs.push(ElseIfNode { block: parse_block(tokens)?, condition: elseif_condition }); - } - _ => return Err("Missing Then after elseif statement"), - } - } - else - { - return Err("Failed to parse elseif condition"); - } - } - Some(Token::Else) => - { - match tokens.next() - { - Some(Token::End) => return Ok(Some(StatNode::If(IfNode { condition, then_block: block, elseifs: Some(elseifs) }))), - _ => return Err("Missing end after else") - } - } - Some(Token::End) => - { - return Ok(Some(StatNode::If(IfNode { condition, then_block: block, elseifs: None }))); - } - _ => return Err("Missing end after if block") - } - } - } - _ => return Err("Missing then after if condition") - } - } - else - { - return Err("Failed to parse if condition"); - } - } - Token::For => - { - match tokens.next() - { - Some(Token::Name(name)) => - { - match tokens.next() - { - Some(Token::Equals) => - { - todo!(); - } - _ => todo!() - } - } - _ => Err("Missing name after for"), - } - } - _ => todo!() + p[index!(0, s, index)] = true + } + } + else if token == tokens[s] + { + p[index!(0, s, index)] = true } } - None => return Ok(None), } -} + println!("Done initializing"); -fn parse_exp(tokens: &mut Iter<'_, Token>) -> Result, &'static str> -{ - todo!(); -} - -fn parse_retstat(tokens: &mut Iter<'_, Token>) -> Result, &'static str> -{ - match tokens.next() + for l in 2..=n { - Some(Token::Return) => + for s in 1..=(n - l + 1) { - todo!() + for _p in 1..=(l-1) + { + for &(a, b, c) in &NONTERMINAL_RULES + { + if p[index!(_p - 1, s - 1, b)] && p[index!(l - _p - 1, s + _p - 1, c)] + { + let index = index!(l - 1, s - 1, a); + p[index] = true; +/* if !back[index].contains(&(_p, b, c)) + { + back[index].push((_p, b, c)); + }*/ + } + } + } } - None => return Ok(None), - _ => return Err("Found wrong token at the beginning of retstat") + println!("{l}"); } + let start_index = NONTERMINAL_NAMES.iter().position(|x| x == &"S_0").expect("no start index found"); + if p[index!(n - 1, 0, start_index)] + { + println!("Is part of the language"); + todo!() + //return Ok(disambiguate(traverse_back(back, tokens, n, 1, start_index))); + } + else + { + return Err("Input is not part of the language") + } +} + +fn traverse_back(back: Vec>, tokens: Vec, l: usize, s: usize, a: usize) -> AmbiguousNode +{ + todo!() +} + +fn disambiguate(root: AmbiguousNode) -> Node +{ + todo!() } \ No newline at end of file diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 92bd163..8fee3e8 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,4 +1,4 @@ -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub enum Token { Name(String), @@ -11,8 +11,7 @@ pub enum Token EqualsEquals, TildeEquals, LtEquals, GtEquals, Lt, Gt, Equals, RoundOpen, RoundClosed, CurlyOpen, CurlyClosed, SquareOpen, SquareClosed, ColonColon, Semicolon, Colon, Comma, Dot, DotDot, DotDotDot, - IntLiteral(String), - HexLiteral(String), + Numeral(String), StringLiteral(String), } @@ -171,7 +170,7 @@ fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, ind '/' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Slash), TokenizerState::Slash), ';' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Semicolon), TokenizerState::Semicolon), '^' => tokenize_terminal_no_str(last_index, *index, token, state, Some(Token::Caret), TokenizerState::Caret), - '0' => tokenize_terminal(last_index, *index, token, state, Some(Token::IntLiteral("0".to_string())), TokenizerState::Zero, token_str, ch), + '0' => tokenize_terminal(last_index, *index, token, state, Some(Token::Numeral("0".to_string())), TokenizerState::Zero, token_str, ch), '"' => { *token = None; @@ -191,7 +190,7 @@ fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, ind } else if ch.is_numeric() && ch.is_ascii() { - tokenize_terminal(last_index, *index, token, state, Some(Token::IntLiteral(token_str.clone())), TokenizerState::Number, token_str, ch); + tokenize_terminal(last_index, *index, token, state, Some(Token::Numeral(token_str.clone())), TokenizerState::Number, token_str, ch); } else { @@ -422,7 +421,7 @@ fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, ind { *last_index = *index as i32; token_str.push(ch); - *token = Some(Token::IntLiteral(token_str.clone())); + *token = Some(Token::Numeral(token_str.clone())); } else { @@ -441,7 +440,7 @@ fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, ind { *last_index = *index as i32; token_str.push(ch); - *token = Some(Token::HexLiteral(token_str.clone())); + *token = Some(Token::Numeral(token_str.clone())); *state = TokenizerState::HexNumber; } else @@ -469,7 +468,7 @@ fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, ind { *last_index = *index as i32; token_str.push(ch); - *token = Some(Token::HexLiteral(token_str.clone())); + *token = Some(Token::Numeral(token_str.clone())); } else { @@ -494,7 +493,7 @@ fn tokenize_char(state: &mut TokenizerState, ch: char, last_index: &mut i32, ind { *last_index = *index as i32; token_str.push(ch); - *token = Some(Token::IntLiteral(token_str.clone())); + *token = Some(Token::Numeral(token_str.clone())); } else {