Multiple improvements to compaction + ability to codegen for rust
This commit is contained in:
parent
3e7599be7e
commit
9c7ec14baf
3
.gitignore
vendored
3
.gitignore
vendored
@ -30,3 +30,6 @@ metals.sbt
|
|||||||
.idea
|
.idea
|
||||||
.idea_modules
|
.idea_modules
|
||||||
/.worksheet/
|
/.worksheet/
|
||||||
|
|
||||||
|
# project specific
|
||||||
|
*.rs
|
@ -111,6 +111,7 @@ def grammarToChomsky(grammar: Vector[GrammarRule]): Vector[GrammarRule] =
|
|||||||
val additional = unit.filter(_.lhs == rule.rhs(0).name).map((a: GrammarRule) => GrammarRule(rule.lhs, a.rhs))
|
val additional = unit.filter(_.lhs == rule.rhs(0).name).map((a: GrammarRule) => GrammarRule(rule.lhs, a.rhs))
|
||||||
unit.addAll(additional)
|
unit.addAll(additional)
|
||||||
rest = rest.diff(resolvable)
|
rest = rest.diff(resolvable)
|
||||||
|
if nullable.contains("S_0") then unit.addOne(GrammarRule("S_0", Vector(GrammarRhs("@", true))))
|
||||||
return unit.toVector
|
return unit.toVector
|
||||||
|
|
||||||
val blockRhs = EbnfRhs("block")
|
val blockRhs = EbnfRhs("block")
|
||||||
@ -227,19 +228,36 @@ val baseGrammar = ebnfToGrammar(baseEbnf)
|
|||||||
|
|
||||||
def printGrammar(grammar: Vector[EbnfRule | GrammarRule]) = grammar.fold("")(_.toString() + "\n" + _.toString())
|
def printGrammar(grammar: Vector[EbnfRule | GrammarRule]) = grammar.fold("")(_.toString() + "\n" + _.toString())
|
||||||
|
|
||||||
|
def removeDuplicates(grammar: Vector[GrammarRule]): Option[Vector[GrammarRule]] =
|
||||||
|
val nonterms = grammar.map(_.lhs).toSet
|
||||||
|
var rightSides: ArrayBuffer[Tuple2[Vector[Vector[GrammarRhs]], String]] = ArrayBuffer()
|
||||||
|
var cleanGrammar = grammar
|
||||||
|
for nonterm <- nonterms do
|
||||||
|
val rightSide = grammar.filter(_.lhs == nonterm).map(_.rhs)
|
||||||
|
val same = rightSides.find(a => a._2 != nonterm && rightSide.forall(b => a._1.exists(_.sameElements(b))) && a._1.forall(b => rightSide.exists(_.sameElements(b))))
|
||||||
|
if same.isEmpty then rightSides.addOne(Tuple2(rightSide, nonterm))
|
||||||
|
else
|
||||||
|
println(s"'$nonterm' already exists as '${same.get._2}' with $rightSide")
|
||||||
|
cleanGrammar = cleanGrammar.filter(_.lhs != nonterm).map(x => GrammarRule(x.lhs, x.rhs.map(y => if !y.terminal && y.name == nonterm then GrammarRhs(same.get._2) else y)))
|
||||||
|
if cleanGrammar.size == grammar.size then return None else return Some(cleanGrammar.toVector)
|
||||||
|
|
||||||
def removeDeadRules(grammar: Vector[GrammarRule], start: String): Vector[GrammarRule] =
|
def removeDeadRules(grammar: Vector[GrammarRule], start: String): Vector[GrammarRule] =
|
||||||
var cleanGrammar: ArrayBuffer[GrammarRule] = ArrayBuffer()
|
println(grammar.size)
|
||||||
for rule <- grammar do
|
var maybeCleanGrammar = removeDuplicates(grammar)
|
||||||
if !cleanGrammar.exists(x => x.lhs == rule.lhs && x.rhs.sameElements(rule.rhs)) then cleanGrammar.addOne(rule)
|
var cleanGrammar = grammar.toVector
|
||||||
println(s"Dedup: ${grammar.diff(cleanGrammar)}")
|
while maybeCleanGrammar.isDefined do
|
||||||
|
cleanGrammar = maybeCleanGrammar.get
|
||||||
|
println(cleanGrammar.size)
|
||||||
|
maybeCleanGrammar = removeDuplicates(cleanGrammar)
|
||||||
var result: Vector[GrammarRule] = Vector()
|
var result: Vector[GrammarRule] = Vector()
|
||||||
var current = grammar.filter(_.lhs == start)
|
var current = cleanGrammar.filter(_.lhs == start)
|
||||||
assert(current.size > 0)
|
assert(current.size > 0)
|
||||||
while current.size > 0 do
|
while current.size > 0 do
|
||||||
println(s"${current.size} ${result.size}")
|
println(s"${current.size} ${result.size}")
|
||||||
result = result.concat(current)
|
result = result.concat(current)
|
||||||
current = grammar.filter((rule: GrammarRule) => !result.exists(_.lhs == rule.lhs) && current.exists((a: GrammarRule) => a.rhs.exists(b => !b.terminal && b.name == rule.lhs)))
|
current = cleanGrammar.filter((rule: GrammarRule) => !result.exists(_.lhs == rule.lhs) && current.exists((a: GrammarRule) => a.rhs.exists(b => !b.terminal && b.name == rule.lhs)))
|
||||||
println(s"Reachability: ${grammar.diff(result)}")
|
println(s"Reachability: ${cleanGrammar.diff(result)}")
|
||||||
|
println(result.size)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def CYK(grammar: Vector[GrammarRule], input: Vector[String], start: String): Option[AmbiguousNode] =
|
def CYK(grammar: Vector[GrammarRule], input: Vector[String], start: String): Option[AmbiguousNode] =
|
||||||
@ -341,13 +359,103 @@ class AmbiguousNode(val content: String, var precedence: Int, var left: Array[Am
|
|||||||
return ret
|
return ret
|
||||||
end AmbiguousNode
|
end AmbiguousNode
|
||||||
|
|
||||||
|
def printGrammarAsRustLUT(grammar: Vector[GrammarRule]) =
|
||||||
|
var builder = StringBuilder()
|
||||||
|
builder.append("use crate::tokenizer::Token;\n\n")
|
||||||
|
builder.append("enum Rule\n{\n\tTerminal(usize, Token),\n\tNonTerminal(usize, usize, usize)\n}\n\n")
|
||||||
|
val nonterms = grammar.map(_.lhs).toSet.toArray
|
||||||
|
builder.append(s"const NONTERMS: [&str; ${nonterms.length}] =\n[\n")
|
||||||
|
for nonterm <- nonterms do
|
||||||
|
builder.append(s"\t\"$nonterm\",\n")
|
||||||
|
builder.append("];\n\n")
|
||||||
|
builder.append(s"const GRAMMAR: [Rule; ${grammar.size - 1}] =\n[\n")
|
||||||
|
for rule <- grammar do
|
||||||
|
if rule.rhs(0).name != "@" then
|
||||||
|
builder.append("\tRule::")
|
||||||
|
if rule.rhs.size == 1 then
|
||||||
|
assert(rule.rhs(0).terminal)
|
||||||
|
builder.append(s"Terminal(${nonterms.indexOf(rule.lhs)}, Token::")
|
||||||
|
builder.append(
|
||||||
|
rule.rhs(0).name match
|
||||||
|
case "Name" => "Name(String::new())"
|
||||||
|
case "and" => "And"
|
||||||
|
case "break" => "Break"
|
||||||
|
case "do" => "Do"
|
||||||
|
case "else" => "Else"
|
||||||
|
case "elseif" => "Elseif"
|
||||||
|
case "end" => "End"
|
||||||
|
case "false" => "False"
|
||||||
|
case "for" => "For"
|
||||||
|
case "function" => "Function"
|
||||||
|
case "goto" => "Goto"
|
||||||
|
case "if" => "If"
|
||||||
|
case "in" => "In"
|
||||||
|
case "local" => "Local"
|
||||||
|
case "nil" => "Nil"
|
||||||
|
case "not" => "Not"
|
||||||
|
case "or" => "Or"
|
||||||
|
case "repeat" => "Repeat"
|
||||||
|
case "return" => "Return"
|
||||||
|
case "then" => "Then"
|
||||||
|
case "true" => "True"
|
||||||
|
case "until" => "Until"
|
||||||
|
case "while" => "While"
|
||||||
|
case "+" => "Plus"
|
||||||
|
case "-" => "Minus"
|
||||||
|
case "*" => "Star"
|
||||||
|
case "/" => "Slash"
|
||||||
|
case "%" => "Percent"
|
||||||
|
case "^" => "Caret"
|
||||||
|
case "#" => "Hash"
|
||||||
|
case "&" => "Ampersand"
|
||||||
|
case "~" => "Tilde"
|
||||||
|
case "|" => "Pipe"
|
||||||
|
case "<<" => "LtLt"
|
||||||
|
case ">>" => "GtGt"
|
||||||
|
case "//" => "SlashSlash"
|
||||||
|
case "==" => "EqualsEquals"
|
||||||
|
case "~=" => "TildeEquals"
|
||||||
|
case "<=" => "LtEquals"
|
||||||
|
case ">=" => "GtEquals"
|
||||||
|
case "<" => "Lt"
|
||||||
|
case ">" => "Gt"
|
||||||
|
case "=" => "Equals"
|
||||||
|
case "(" => "RoundOpen"
|
||||||
|
case ")" => "RoundClosed"
|
||||||
|
case "{" => "CurlyOpen"
|
||||||
|
case "}" => "CurlyClosed"
|
||||||
|
case "[" => "SquareOpen"
|
||||||
|
case "]" => "SquareClosed"
|
||||||
|
case "::" => "ColonColon"
|
||||||
|
case ";" => "Semicolon"
|
||||||
|
case ":" => "Colon"
|
||||||
|
case "," => "Comma"
|
||||||
|
case "." => "Dot"
|
||||||
|
case ".." => "DotDot"
|
||||||
|
case "..." => "DotDotDot"
|
||||||
|
case "Numeral" => "Numeral(String::new())"
|
||||||
|
case "LiteralString" => "StringLiteral(String::new())"
|
||||||
|
case _ => throw Exception(s"unknown terminal ${rule.rhs(0).name}")
|
||||||
|
)
|
||||||
|
builder.append("),\n")
|
||||||
|
else
|
||||||
|
assert(rule.rhs.size == 2 && !rule.rhs(0).terminal && !rule.rhs(1).terminal)
|
||||||
|
builder.append(s"NonTerminal(${nonterms.indexOf(rule.lhs)}, ${nonterms.indexOf(rule.rhs(0).name)}, ${nonterms.indexOf(rule.rhs(1).name)}),\n")
|
||||||
|
builder.append("];\n")
|
||||||
|
|
||||||
|
println(builder.result())
|
||||||
|
val fw = FileWriter(new File("grammar.rs"))
|
||||||
|
fw.write(builder.result())
|
||||||
|
fw.close()
|
||||||
|
|
||||||
val chomskyGrammar = grammarToChomsky(baseGrammar).sortBy(x => x.lhs)
|
val chomskyGrammar = grammarToChomsky(baseGrammar).sortBy(x => x.lhs)
|
||||||
val cleanChomskyGrammar = removeDeadRules(chomskyGrammar, "S_0").sortBy(x => x.lhs)
|
val cleanChomskyGrammar = removeDeadRules(chomskyGrammar, "S_0").sortBy(x => x.lhs)
|
||||||
@main def main = println(printGrammar(cleanChomskyGrammar))
|
@main def main =// println(printGrammar(cleanChomskyGrammar))
|
||||||
/* println(printGrammar(chomskyGrammar))
|
//println(printGrammar(chomskyGrammar))
|
||||||
println("---")
|
//println("---")
|
||||||
println(printGrammar(cleanChomskyGrammar))
|
println(printGrammar(cleanChomskyGrammar))
|
||||||
val root = CYK(cleanChomskyGrammar, Vector("local", "Name", "=", "Numeral", "+", "Numeral", "+", "(", "Numeral", "*", "Numeral", ")"), "S_0").get
|
val root = CYK(cleanChomskyGrammar, Vector("local", "Name", "=", "Numeral", "+", "Numeral", "+", "(", "Numeral", "*", "Numeral", ")"), "S_0").get
|
||||||
println(root)
|
println(root)
|
||||||
println("===============\n\n\n")
|
println("===============\n\n\n")
|
||||||
println(disambiguate(root)) */
|
println(disambiguate(root))
|
||||||
|
printGrammarAsRustLUT(cleanChomskyGrammar)
|
Loading…
x
Reference in New Issue
Block a user