Multiple improvements to compaction + ability to codegen for rust
This commit is contained in:
parent
3e7599be7e
commit
9c7ec14baf
3
.gitignore
vendored
3
.gitignore
vendored
@ -30,3 +30,6 @@ metals.sbt
|
||||
.idea
|
||||
.idea_modules
|
||||
/.worksheet/
|
||||
|
||||
# project specific
|
||||
*.rs
|
@ -111,6 +111,7 @@ def grammarToChomsky(grammar: Vector[GrammarRule]): Vector[GrammarRule] =
|
||||
val additional = unit.filter(_.lhs == rule.rhs(0).name).map((a: GrammarRule) => GrammarRule(rule.lhs, a.rhs))
|
||||
unit.addAll(additional)
|
||||
rest = rest.diff(resolvable)
|
||||
if nullable.contains("S_0") then unit.addOne(GrammarRule("S_0", Vector(GrammarRhs("@", true))))
|
||||
return unit.toVector
|
||||
|
||||
val blockRhs = EbnfRhs("block")
|
||||
@ -227,19 +228,36 @@ val baseGrammar = ebnfToGrammar(baseEbnf)
|
||||
|
||||
def printGrammar(grammar: Vector[EbnfRule | GrammarRule]) = grammar.fold("")(_.toString() + "\n" + _.toString())
|
||||
|
||||
def removeDuplicates(grammar: Vector[GrammarRule]): Option[Vector[GrammarRule]] =
|
||||
val nonterms = grammar.map(_.lhs).toSet
|
||||
var rightSides: ArrayBuffer[Tuple2[Vector[Vector[GrammarRhs]], String]] = ArrayBuffer()
|
||||
var cleanGrammar = grammar
|
||||
for nonterm <- nonterms do
|
||||
val rightSide = grammar.filter(_.lhs == nonterm).map(_.rhs)
|
||||
val same = rightSides.find(a => a._2 != nonterm && rightSide.forall(b => a._1.exists(_.sameElements(b))) && a._1.forall(b => rightSide.exists(_.sameElements(b))))
|
||||
if same.isEmpty then rightSides.addOne(Tuple2(rightSide, nonterm))
|
||||
else
|
||||
println(s"'$nonterm' already exists as '${same.get._2}' with $rightSide")
|
||||
cleanGrammar = cleanGrammar.filter(_.lhs != nonterm).map(x => GrammarRule(x.lhs, x.rhs.map(y => if !y.terminal && y.name == nonterm then GrammarRhs(same.get._2) else y)))
|
||||
if cleanGrammar.size == grammar.size then return None else return Some(cleanGrammar.toVector)
|
||||
|
||||
def removeDeadRules(grammar: Vector[GrammarRule], start: String): Vector[GrammarRule] =
|
||||
var cleanGrammar: ArrayBuffer[GrammarRule] = ArrayBuffer()
|
||||
for rule <- grammar do
|
||||
if !cleanGrammar.exists(x => x.lhs == rule.lhs && x.rhs.sameElements(rule.rhs)) then cleanGrammar.addOne(rule)
|
||||
println(s"Dedup: ${grammar.diff(cleanGrammar)}")
|
||||
println(grammar.size)
|
||||
var maybeCleanGrammar = removeDuplicates(grammar)
|
||||
var cleanGrammar = grammar.toVector
|
||||
while maybeCleanGrammar.isDefined do
|
||||
cleanGrammar = maybeCleanGrammar.get
|
||||
println(cleanGrammar.size)
|
||||
maybeCleanGrammar = removeDuplicates(cleanGrammar)
|
||||
var result: Vector[GrammarRule] = Vector()
|
||||
var current = grammar.filter(_.lhs == start)
|
||||
var current = cleanGrammar.filter(_.lhs == start)
|
||||
assert(current.size > 0)
|
||||
while current.size > 0 do
|
||||
println(s"${current.size} ${result.size}")
|
||||
result = result.concat(current)
|
||||
current = grammar.filter((rule: GrammarRule) => !result.exists(_.lhs == rule.lhs) && current.exists((a: GrammarRule) => a.rhs.exists(b => !b.terminal && b.name == rule.lhs)))
|
||||
println(s"Reachability: ${grammar.diff(result)}")
|
||||
current = cleanGrammar.filter((rule: GrammarRule) => !result.exists(_.lhs == rule.lhs) && current.exists((a: GrammarRule) => a.rhs.exists(b => !b.terminal && b.name == rule.lhs)))
|
||||
println(s"Reachability: ${cleanGrammar.diff(result)}")
|
||||
println(result.size)
|
||||
return result
|
||||
|
||||
def CYK(grammar: Vector[GrammarRule], input: Vector[String], start: String): Option[AmbiguousNode] =
|
||||
@ -341,13 +359,103 @@ class AmbiguousNode(val content: String, var precedence: Int, var left: Array[Am
|
||||
return ret
|
||||
end AmbiguousNode
|
||||
|
||||
def printGrammarAsRustLUT(grammar: Vector[GrammarRule]) =
|
||||
var builder = StringBuilder()
|
||||
builder.append("use crate::tokenizer::Token;\n\n")
|
||||
builder.append("enum Rule\n{\n\tTerminal(usize, Token),\n\tNonTerminal(usize, usize, usize)\n}\n\n")
|
||||
val nonterms = grammar.map(_.lhs).toSet.toArray
|
||||
builder.append(s"const NONTERMS: [&str; ${nonterms.length}] =\n[\n")
|
||||
for nonterm <- nonterms do
|
||||
builder.append(s"\t\"$nonterm\",\n")
|
||||
builder.append("];\n\n")
|
||||
builder.append(s"const GRAMMAR: [Rule; ${grammar.size - 1}] =\n[\n")
|
||||
for rule <- grammar do
|
||||
if rule.rhs(0).name != "@" then
|
||||
builder.append("\tRule::")
|
||||
if rule.rhs.size == 1 then
|
||||
assert(rule.rhs(0).terminal)
|
||||
builder.append(s"Terminal(${nonterms.indexOf(rule.lhs)}, Token::")
|
||||
builder.append(
|
||||
rule.rhs(0).name match
|
||||
case "Name" => "Name(String::new())"
|
||||
case "and" => "And"
|
||||
case "break" => "Break"
|
||||
case "do" => "Do"
|
||||
case "else" => "Else"
|
||||
case "elseif" => "Elseif"
|
||||
case "end" => "End"
|
||||
case "false" => "False"
|
||||
case "for" => "For"
|
||||
case "function" => "Function"
|
||||
case "goto" => "Goto"
|
||||
case "if" => "If"
|
||||
case "in" => "In"
|
||||
case "local" => "Local"
|
||||
case "nil" => "Nil"
|
||||
case "not" => "Not"
|
||||
case "or" => "Or"
|
||||
case "repeat" => "Repeat"
|
||||
case "return" => "Return"
|
||||
case "then" => "Then"
|
||||
case "true" => "True"
|
||||
case "until" => "Until"
|
||||
case "while" => "While"
|
||||
case "+" => "Plus"
|
||||
case "-" => "Minus"
|
||||
case "*" => "Star"
|
||||
case "/" => "Slash"
|
||||
case "%" => "Percent"
|
||||
case "^" => "Caret"
|
||||
case "#" => "Hash"
|
||||
case "&" => "Ampersand"
|
||||
case "~" => "Tilde"
|
||||
case "|" => "Pipe"
|
||||
case "<<" => "LtLt"
|
||||
case ">>" => "GtGt"
|
||||
case "//" => "SlashSlash"
|
||||
case "==" => "EqualsEquals"
|
||||
case "~=" => "TildeEquals"
|
||||
case "<=" => "LtEquals"
|
||||
case ">=" => "GtEquals"
|
||||
case "<" => "Lt"
|
||||
case ">" => "Gt"
|
||||
case "=" => "Equals"
|
||||
case "(" => "RoundOpen"
|
||||
case ")" => "RoundClosed"
|
||||
case "{" => "CurlyOpen"
|
||||
case "}" => "CurlyClosed"
|
||||
case "[" => "SquareOpen"
|
||||
case "]" => "SquareClosed"
|
||||
case "::" => "ColonColon"
|
||||
case ";" => "Semicolon"
|
||||
case ":" => "Colon"
|
||||
case "," => "Comma"
|
||||
case "." => "Dot"
|
||||
case ".." => "DotDot"
|
||||
case "..." => "DotDotDot"
|
||||
case "Numeral" => "Numeral(String::new())"
|
||||
case "LiteralString" => "StringLiteral(String::new())"
|
||||
case _ => throw Exception(s"unknown terminal ${rule.rhs(0).name}")
|
||||
)
|
||||
builder.append("),\n")
|
||||
else
|
||||
assert(rule.rhs.size == 2 && !rule.rhs(0).terminal && !rule.rhs(1).terminal)
|
||||
builder.append(s"NonTerminal(${nonterms.indexOf(rule.lhs)}, ${nonterms.indexOf(rule.rhs(0).name)}, ${nonterms.indexOf(rule.rhs(1).name)}),\n")
|
||||
builder.append("];\n")
|
||||
|
||||
println(builder.result())
|
||||
val fw = FileWriter(new File("grammar.rs"))
|
||||
fw.write(builder.result())
|
||||
fw.close()
|
||||
|
||||
val chomskyGrammar = grammarToChomsky(baseGrammar).sortBy(x => x.lhs)
|
||||
val cleanChomskyGrammar = removeDeadRules(chomskyGrammar, "S_0").sortBy(x => x.lhs)
|
||||
@main def main = println(printGrammar(cleanChomskyGrammar))
|
||||
/* println(printGrammar(chomskyGrammar))
|
||||
println("---")
|
||||
@main def main =// println(printGrammar(cleanChomskyGrammar))
|
||||
//println(printGrammar(chomskyGrammar))
|
||||
//println("---")
|
||||
println(printGrammar(cleanChomskyGrammar))
|
||||
val root = CYK(cleanChomskyGrammar, Vector("local", "Name", "=", "Numeral", "+", "Numeral", "+", "(", "Numeral", "*", "Numeral", ")"), "S_0").get
|
||||
println(root)
|
||||
println("===============\n\n\n")
|
||||
println(disambiguate(root)) */
|
||||
println(disambiguate(root))
|
||||
printGrammarAsRustLUT(cleanChomskyGrammar)
|
Loading…
x
Reference in New Issue
Block a user