Initial commit

This commit is contained in:
0x4261756D 2023-07-09 03:56:43 +02:00
commit 39449b402b
6 changed files with 415 additions and 0 deletions

32
.gitignore vendored Normal file
View File

@ -0,0 +1,32 @@
# macOS
.DS_Store
# sbt specific
dist/*
target/
lib_managed/
src_managed/
project/boot/
project/plugins/project/
project/local-plugins.sbt
.history
.ensime
.ensime_cache/
.sbt-scripted/
local.sbt
# Bloop
.bsp
# VS Code
.vscode/
# Metals
.bloop/
.metals/
metals.sbt
# IDEA
.idea
.idea_modules
/.worksheet/

8
README.md Normal file
View File

@ -0,0 +1,8 @@
## sbt project compiled with Scala 3
### Usage
This is a normal sbt project. You can compile code with `sbt compile`, run it with `sbt run`, and `sbt console` will start a Scala 3 REPL.
For more information on the sbt-dotty plugin, see the
[scala3-example-project](https://github.com/scala/scala3-example-project/blob/main/README.md).

12
build.sbt Normal file
View File

@ -0,0 +1,12 @@
val scala3Version = "3.3.0"
lazy val root = project
.in(file("."))
.settings(
name := "grammar_fun",
version := "0.1.0-SNAPSHOT",
scalaVersion := scala3Version,
libraryDependencies += "org.scalameta" %% "munit" % "0.7.29" % Test
)

1
project/build.properties Normal file
View File

@ -0,0 +1 @@
sbt.version=1.9.1

353
src/main/scala/Main.scala Normal file
View File

@ -0,0 +1,353 @@
import scala.collection.mutable.ArrayBuffer
import scala.collection.immutable.HashSet
import java.io.FileWriter
import java.io.BufferedWriter
import java.io.File
import scala.collection.mutable.Queue
import scala.collection.mutable.HashMap
class EbnfRule(var lhs: String, var rhs: Vector[EbnfRhs]):
override def toString(): String = if rhs.length > 0 then s"$lhs ::= ${rhs.fold("")(joinRhsStrings)}" else s"$lhs ::= @"
end EbnfRule
def joinRhsStrings(a: EbnfRhs | GrammarRhs | String, b: EbnfRhs | GrammarRhs | String): String = s"$a $b"
class EbnfRhs(var name: String, var terminal: Boolean = false, var optional: Boolean = false, var repeatable: Boolean = false):
override def toString(): String = (if terminal then "'" else "") + name + (if terminal then "'" else "") + (if optional then if repeatable then "*" else "?" else if repeatable then "+" else "")
end EbnfRhs
class GrammarRule(var lhs: String, var rhs: Vector[GrammarRhs]):
override def toString(): String = if rhs.length > 0 then s"$lhs -> ${rhs.fold("")(joinRhsStrings)}" else s"$lhs -> @"
override def equals(other: Any): Boolean =
if !other.isInstanceOf[GrammarRule] then return false
val o = other.asInstanceOf[GrammarRule]
return o.lhs == lhs && o.rhs.size == rhs.size && o.rhs.sameElements(rhs)
end GrammarRule
class GrammarRhs(var name: String, var terminal: Boolean = false):
override def toString(): String = if terminal then s"'$name'" else name
override def equals(other: Any): Boolean =
if !other.isInstanceOf[GrammarRhs] then return false
val o = other.asInstanceOf[GrammarRhs]
return o.name == name && o.terminal == terminal
end GrammarRhs
def ebnfToGrammar(ebnf: Vector[EbnfRule]): Vector[GrammarRule] =
var grammar: ArrayBuffer[GrammarRule] = ebnf.filter(!_.rhs.exists((rhs: EbnfRhs) => rhs.optional || rhs.repeatable)).map(
(rule: EbnfRule) => GrammarRule(rule.lhs, rule.rhs.map((rhs: EbnfRhs) => GrammarRhs(rhs.name, rhs.terminal)))
).to(ArrayBuffer)
val optionalSuffix = "_?"
val starSuffix = "_*"
val plusSuffix = "_+"
for rule <- ebnf.filter(_.rhs.exists((rhs: EbnfRhs) => rhs.optional || rhs.repeatable)) do
for rhs <- rule.rhs do
if rhs.optional then
if rhs.repeatable then
if !grammar.exists(_.lhs == rhs.name + starSuffix) then
grammar.addOne(GrammarRule(rhs.name + starSuffix, Vector()))
grammar.addOne(GrammarRule(rhs.name + starSuffix, Vector(GrammarRhs(rhs.name, rhs.terminal), GrammarRhs(rhs.name + starSuffix))))
else if !grammar.exists(_.lhs == rhs.name + optionalSuffix) then
grammar.addOne(GrammarRule(rhs.name + optionalSuffix, Vector()))
grammar.addOne(GrammarRule(rhs.name + optionalSuffix, Vector(GrammarRhs(rhs.name, rhs.terminal))))
else if rhs.repeatable && !grammar.exists(_.lhs == rhs.name + plusSuffix) then
grammar.addOne(GrammarRule(rhs.name + plusSuffix, Vector(GrammarRhs(rhs.name, rhs.terminal))))
grammar.addOne(GrammarRule(rhs.name + plusSuffix, Vector(GrammarRhs(rhs.name, rhs.terminal), GrammarRhs(rhs.name + plusSuffix))))
grammar.addOne(GrammarRule(rule.lhs, rule.rhs.map((rhs: EbnfRhs) =>
if rhs.optional then
if rhs.repeatable then
GrammarRhs(rhs.name + starSuffix)
else
GrammarRhs(rhs.name + optionalSuffix)
else
if rhs.repeatable then
GrammarRhs(rhs.name + starSuffix)
else
GrammarRhs(rhs.name, rhs.terminal)
)))
return grammar.toVector
def grammarToChomsky(grammar: Vector[GrammarRule]): Vector[GrammarRule] =
// START
var term = ArrayBuffer(GrammarRule("S_0", Vector(GrammarRhs(grammar(0).lhs))))
// TERM
val nontermSuffix = "_non"
term.addAll(grammar.filter((rule: GrammarRule) => rule.rhs.size == 1 || !rule.rhs.exists(_.terminal)))
for rule <- grammar.diff(term) do
for rhs <- rule.rhs do
if rhs.terminal && !term.exists(_.lhs == rhs.name + nontermSuffix) then
term.addOne(GrammarRule(rhs.name + nontermSuffix, Vector(rhs)))
term.addOne(GrammarRule(rule.lhs, rule.rhs.map((rhs: GrammarRhs) => if rhs.terminal then GrammarRhs(rhs.name + nontermSuffix) else rhs)))
// BIN
var counter = 0
val bin = term.filter(_.rhs.size <= 2)
for rule <- term.diff(bin) do
bin.addOne(GrammarRule(rule.lhs, Vector(rule.rhs(0), GrammarRhs(s"${rule.lhs}__$counter"))))
for i <- 0 until (rule.rhs.size - 3) do
bin.addOne(GrammarRule(s"${rule.lhs}__$counter", Vector(rule.rhs(i + 1), GrammarRhs(s"${rule.lhs}__${counter+1}"))))
counter += 1
bin.addOne(GrammarRule(s"${rule.lhs}__${counter}", Vector(rule.rhs(rule.rhs.size - 2), rule.rhs(rule.rhs.size - 1))))
counter += 1
// DEL
var nullable = bin.filter(_.rhs.isEmpty).map(_.lhs).toSet
var changed = true
while changed do
val size = nullable.size
nullable = nullable ++ bin.filter(!_.rhs.exists((rhs: GrammarRhs) => !nullable.contains(rhs.name))).map(_.lhs)
changed = size != nullable.size
var del: ArrayBuffer[GrammarRule] = bin.filter(!_.rhs.isEmpty)
for item <- nullable do
for rule <- bin do
assert(rule.rhs.size <= 2)
if rule.rhs.size > 1 then
if rule.rhs(0).name == item then
del.addOne(GrammarRule(rule.lhs, Vector(rule.rhs(1))))
if rule.rhs(1).name == item then
del.addOne(GrammarRule(rule.lhs, Vector(rule.rhs(0))))
// UNIT
var unit = del.filter((rule: GrammarRule) => rule.rhs.size != 1 || rule.rhs(0).terminal)
var rest = del.diff(unit)
while rest.size > 0 do
val resolvable = rest.filter((a: GrammarRule) => !rest.exists(a.rhs(0).name == _.lhs) && unit.exists(a.rhs(0).name == _.lhs))
for rule <- resolvable do
val additional = unit.filter(_.lhs == rule.rhs(0).name).map((a: GrammarRule) => GrammarRule(rule.lhs, a.rhs))
unit.addAll(additional)
rest = rest.diff(resolvable)
return unit.toVector
val blockRhs = EbnfRhs("block")
val endRhs = EbnfRhs("end", true)
val expRhs = EbnfRhs("exp")
val nameRhs = EbnfRhs("Name", true)
val eqRhs = EbnfRhs("=", true)
val commaRhs = EbnfRhs(",", true)
val doRhs = EbnfRhs("do", true)
val funcRhs = EbnfRhs("function", true)
val varRhs = EbnfRhs("var")
val prefixexpRhs = EbnfRhs("prefixexp")
val oBrackRhs = EbnfRhs("(", true)
val cBrackRhs = EbnfRhs(")", true)
val varargsRhs = EbnfRhs("...", true)
val baseEbnf = Vector( EbnfRule("chunk", Vector(blockRhs)),
EbnfRule("block", Vector(EbnfRhs("stat", optional=true, repeatable=true), EbnfRhs("retstat", optional=true))),
EbnfRule("stat", Vector(EbnfRhs(";", true))),
EbnfRule("stat", Vector(EbnfRhs("varlist"), eqRhs, EbnfRhs("explist"))),
EbnfRule("stat", Vector(EbnfRhs("functioncall"))),
EbnfRule("stat", Vector(EbnfRhs("label"))),
EbnfRule("stat", Vector(EbnfRhs("break", true))),
EbnfRule("stat", Vector(EbnfRhs("goto", true), nameRhs)),
EbnfRule("stat", Vector(doRhs, blockRhs, endRhs)),
EbnfRule("stat", Vector(EbnfRhs("while", true), expRhs, doRhs, blockRhs, endRhs)),
EbnfRule("stat", Vector(EbnfRhs("repeat", true), blockRhs, EbnfRhs("until", true), expRhs)),
EbnfRule("stat", Vector(EbnfRhs("if", true), expRhs, EbnfRhs("then", true), blockRhs, EbnfRhs("elseifblocks", optional=true, repeatable=true), EbnfRhs("elseblock", optional=true), endRhs)),
EbnfRule("stat", Vector(EbnfRhs("for", true), nameRhs, eqRhs, expRhs, commaRhs, expRhs, EbnfRhs("forthirdarg", optional=true), doRhs, blockRhs, endRhs)),
EbnfRule("stat", Vector(EbnfRhs("for", true), EbnfRhs("namelist"), EbnfRhs("in", true), EbnfRhs("explist"), doRhs, blockRhs, endRhs)),
EbnfRule("stat", Vector(funcRhs, EbnfRhs("funcname"), EbnfRhs("funcbody"))),
EbnfRule("stat", Vector(EbnfRhs("local", true), funcRhs, nameRhs, EbnfRhs("funcbody"))),
EbnfRule("stat", Vector(EbnfRhs("local", true), EbnfRhs("attnamelist"), EbnfRhs("assign", optional=true))),
EbnfRule("elseifblocks", Vector(EbnfRhs("elseif", true), expRhs, EbnfRhs("then", true), blockRhs)),
EbnfRule("elseblock", Vector(EbnfRhs("else", true), blockRhs)),
EbnfRule("forthirdarg", Vector(commaRhs, expRhs)),
EbnfRule("assign", Vector(eqRhs, EbnfRhs("explist"))),
EbnfRule("attnamelist", Vector(nameRhs, EbnfRhs("attrib"), EbnfRhs("moreattribs", optional=true, repeatable=true))),
EbnfRule("moreattribs", Vector(commaRhs, nameRhs, EbnfRhs("attrib"))),
EbnfRule("attrib", Vector(EbnfRhs("<", true), nameRhs, EbnfRhs(">", true))),
EbnfRule("attrib", Vector()),
EbnfRule("retstat", Vector(EbnfRhs("return", true), EbnfRhs("explist", optional=true), EbnfRhs(";", true, true))),
EbnfRule("label", Vector(EbnfRhs("::", true), nameRhs, EbnfRhs("::", true))),
EbnfRule("funcname", Vector(nameRhs, EbnfRhs("funcnamedotexpansion", optional=true, repeatable=true), EbnfRhs("funcnamecolonexpansion", optional=true))),
EbnfRule("funcnamedotexpansion", Vector(EbnfRhs(".", true), nameRhs)),
EbnfRule("funcnamecolonexpansion", Vector(EbnfRhs(":", true), nameRhs)),
EbnfRule("varlist", Vector(varRhs, EbnfRhs("morevars", optional=true, repeatable=true))),
EbnfRule("morevars", Vector(commaRhs, nameRhs)),
EbnfRule("var", Vector(nameRhs)),
EbnfRule("var", Vector(prefixexpRhs, EbnfRhs("[", true), expRhs, EbnfRhs("]", true))),
EbnfRule("var", Vector(prefixexpRhs, EbnfRhs(".", true), nameRhs)),
EbnfRule("namelist", Vector(nameRhs, EbnfRhs("morenames", optional=true, repeatable=true))),
EbnfRule("morenames", Vector(commaRhs, nameRhs)),
EbnfRule("explist", Vector(expRhs, EbnfRhs("moreexps", optional=true, repeatable=true))),
EbnfRule("moreexps", Vector(commaRhs, expRhs)),
EbnfRule("exp", Vector(EbnfRhs("nil", true))),
EbnfRule("exp", Vector(EbnfRhs("false", true))),
EbnfRule("exp", Vector(EbnfRhs("true", true))),
EbnfRule("exp", Vector(EbnfRhs("Numeral", true))),
EbnfRule("exp", Vector(EbnfRhs("LiteralString", true))),
EbnfRule("exp", Vector(varargsRhs)),
EbnfRule("exp", Vector(EbnfRhs("functiondef"))),
EbnfRule("exp", Vector(prefixexpRhs)),
EbnfRule("exp", Vector(EbnfRhs("tableconstructor"))),
EbnfRule("exp", Vector(expRhs, EbnfRhs("binop"), expRhs)),
EbnfRule("exp", Vector(EbnfRhs("unop"), expRhs)),
EbnfRule("prefixexp", Vector(varRhs)),
EbnfRule("prefixexp", Vector(EbnfRhs("functioncall"))),
EbnfRule("prefixexp", Vector(oBrackRhs, expRhs, cBrackRhs)),
EbnfRule("functioncall", Vector(prefixexpRhs, EbnfRhs("args"))),
EbnfRule("functioncall", Vector(prefixexpRhs, EbnfRhs(":", true), nameRhs, EbnfRhs("args"))),
EbnfRule("args", Vector(oBrackRhs, EbnfRhs("explist", optional=true), cBrackRhs)),
EbnfRule("args", Vector(EbnfRhs("tableconstructor"))),
EbnfRule("args", Vector(EbnfRhs("LiteralString", true))),
EbnfRule("functiondef", Vector(funcRhs, EbnfRhs("funcbody"))),
EbnfRule("funcbody", Vector(oBrackRhs, EbnfRhs("parlist", optional=true), cBrackRhs, blockRhs, endRhs)),
EbnfRule("parlist", Vector(EbnfRhs("namelist"), EbnfRhs("parlistvarargs", optional=true))),
EbnfRule("parlist", Vector(varargsRhs)),
EbnfRule("parlistvarargs", Vector(commaRhs, varargsRhs)),
EbnfRule("tableconstructor", Vector(EbnfRhs("{", true), EbnfRhs("fieldlist", optional=true), EbnfRhs("}", true))),
EbnfRule("fieldlist", Vector(EbnfRhs("field"), EbnfRhs("morefields", optional=true, repeatable=true), EbnfRhs("fieldsep", optional=true))),
EbnfRule("morefields", Vector(EbnfRhs("fieldsep"), EbnfRhs("field"))),
EbnfRule("field", Vector(EbnfRhs("[", true), expRhs, EbnfRhs("]", true), eqRhs, expRhs)),
EbnfRule("field", Vector(nameRhs, eqRhs, expRhs)),
EbnfRule("field", Vector(expRhs)),
EbnfRule("fieldsep", Vector(commaRhs)),
EbnfRule("fieldsep", Vector(EbnfRhs(";", true))),
EbnfRule("binop", Vector(EbnfRhs("+", true))),
EbnfRule("binop", Vector(EbnfRhs("-", true))),
EbnfRule("binop", Vector(EbnfRhs("*", true))),
EbnfRule("binop", Vector(EbnfRhs("/", true))),
EbnfRule("binop", Vector(EbnfRhs("//", true))),
EbnfRule("binop", Vector(EbnfRhs("^", true))),
EbnfRule("binop", Vector(EbnfRhs("%", true))),
EbnfRule("binop", Vector(EbnfRhs("&", true))),
EbnfRule("binop", Vector(EbnfRhs("|", true))),
EbnfRule("binop", Vector(EbnfRhs(">>", true))),
EbnfRule("binop", Vector(EbnfRhs("<<", true))),
EbnfRule("binop", Vector(EbnfRhs("..", true))),
EbnfRule("binop", Vector(EbnfRhs("<", true))),
EbnfRule("binop", Vector(EbnfRhs("<=", true))),
EbnfRule("binop", Vector(EbnfRhs(">", true))),
EbnfRule("binop", Vector(EbnfRhs(">=", true))),
EbnfRule("binop", Vector(EbnfRhs("==", true))),
EbnfRule("binop", Vector(EbnfRhs("~=", true))),
EbnfRule("binop", Vector(EbnfRhs("and", true))),
EbnfRule("binop", Vector(EbnfRhs("or", true))),
EbnfRule("unop", Vector(EbnfRhs("-", true))),
EbnfRule("unop", Vector(EbnfRhs("not", true))),
EbnfRule("unop", Vector(EbnfRhs("#", true))),
EbnfRule("unop", Vector(EbnfRhs("~", true))),
)
val baseGrammar = ebnfToGrammar(baseEbnf)
def printGrammar(grammar: Vector[EbnfRule | GrammarRule]) = grammar.fold("")(_.toString() + "\n" + _.toString())
def removeDeadRules(grammar: Vector[GrammarRule], start: String): Vector[GrammarRule] =
var cleanGrammar: ArrayBuffer[GrammarRule] = ArrayBuffer()
for rule <- grammar do
if !cleanGrammar.exists(x => x.lhs == rule.lhs && x.rhs.sameElements(rule.rhs)) then cleanGrammar.addOne(rule)
println(s"Dedup: ${grammar.diff(cleanGrammar)}")
var result: Vector[GrammarRule] = Vector()
var current = grammar.filter(_.lhs == start)
assert(current.size > 0)
while current.size > 0 do
println(s"${current.size} ${result.size}")
result = result.concat(current)
current = grammar.filter((rule: GrammarRule) => !result.exists(_.lhs == rule.lhs) && current.exists((a: GrammarRule) => a.rhs.exists(b => !b.terminal && b.name == rule.lhs)))
println(s"Reachability: ${grammar.diff(result)}")
return result
def CYK(grammar: Vector[GrammarRule], input: Vector[String], start: String): Option[AmbiguousNode] =
val indices = grammar.map(_.lhs).toSet.toArray
val r = indices.size
val n = input.size
val p = Array.ofDim[Boolean](n, n, r)
val back = Array.ofDim[ArrayBuffer[Tuple3[Int, Int, Int]]](n, n, r)
for s <- 0 until n do
for rule <- grammar.filter(a => a.rhs(0).terminal && a.rhs(0).name == input(s)) do
p(0)(s)(indices.indexOf(rule.lhs)) = true
val nonTermToNonterms = grammar.filter(_.rhs.size == 2)
var output = StringBuilder()
for l <- 2 to n do
for s <- 1 to n - l + 1 do
for _p <- 1 to l - 1 do
for rule <- nonTermToNonterms do
val b = indices.indexOf(rule.rhs(0).name)
assert(b >= 0)
val c = indices.indexOf(rule.rhs(1).name)
assert(c >= 0)
if p(_p - 1)(s - 1)(b) && p(l - _p - 1)(s + _p - 1)(c) then
val a = indices.indexOf(rule.lhs)
assert(a >= 0, a < r)
p(l - 1)(s - 1)(a) = true
val jump = Tuple3(_p, b, c)
if back(l - 1)(s - 1)(a) == null then back(l - 1)(s - 1)(a) = ArrayBuffer()
if !back(l - 1)(s - 1)(a).contains(jump) then
back(l - 1)(s - 1)(a).addOne(jump)
if p(n - 1)(0)(indices.indexOf(start)) then
println("Is valid")
return Some(traverseBack(back, indices, input, n, 1, indices.indexOf(start), true))
else
println("Is invalid")
return None
val precedences = HashMap[String, Int](
("or", 0),
("and", 2),
("<", 4), (">", 4), ("<=", 4), (">=", 4), ("~=", 4), ("==", 4),
("|", 6),
("~", 8),
("&", 10),
("<<", 12), (">>", 12),
("..", 14),
("+", 16), ("-", 16),
("*", 18), ("/", 18), ("//", 18), ("%", 18),
//("#", 10), ("not", 10),
("^", 22)
)
def traverseBack(back: Array[Array[Array[ArrayBuffer[Tuple3[Int, Int, Int]]]]], indices: Array[String], input: Vector[String], l: Int, s: Int, a: Int, left: Boolean): AmbiguousNode =
if l == 1 then
val name = input(s - 1)
val precedence = if indices(a) == "unop" then 20 else precedences.getOrElse(name, 0)
return AmbiguousNode(indices(a), precedence, Array(AmbiguousNode(input(s - 1), precedence, Array(), Array())), Array())
println(back(l - 1)(s - 1)(a).size)
val current = back(l - 1)(s - 1)(a)
return AmbiguousNode(indices(a), -1, current.map(tuple => traverseBack(back, indices, input, tuple._1, s, tuple._2, true)).toArray,
current.map(tuple => traverseBack(back, indices, input, l - tuple._1, s + tuple._1, tuple._3, false)).toArray)
def disambiguate(root: AmbiguousNode): Node =
if root.precedence != -1 then
assert(root.left.size == 1 && root.right.isEmpty)
val child = root.left(0)
return Node(root.content, root.precedence, Some(Node(child.content, child.precedence, None, None)), None)
var precedence = 0
var left: Option[Node] = None
if !root.left.isEmpty then
left = Some(root.left.map(disambiguate(_)).maxBy(_.precedence))
precedence = left.get.precedence
var right: Option[Node] = None
if !root.right.isEmpty then
right = Some(root.right.map(disambiguate(_)).maxBy(_.precedence))
precedence = math.max(precedence, right.get.precedence)
return Node(root.content, precedence, left, right)
class Node(val content: String, var precedence: Int, var left: Option[Node], var right: Option[Node]):
override def toString(): String =
if left.isDefined then
if right.isDefined then
s"{\"content\": \"$content\",\n\"precedence\": $precedence,\n\"left\": ${left.get},\n\"right\": ${right.get}}"
else
s"{\"content\": \"$content\",\n\"precedence\": $precedence,\n\"terminal\": ${left.get}}"
else
s"{\"content\": \"$content\",\n\"precedence\": $precedence}"
end Node
class AmbiguousNode(val content: String, var precedence: Int, var left: Array[AmbiguousNode], var right: Array[AmbiguousNode]):
override def toString(): String =
var ret = s"{\"content\": \"$content\",\n\"precedence\": $precedence,\n\"members\": [["
for a <- left do
ret += a.toString() + ","
ret += "],["
for a <- right do
ret += a.toString() + ","
ret += "]]}"
return ret
end AmbiguousNode
val chomskyGrammar = grammarToChomsky(baseGrammar).sortBy(x => x.lhs)
val cleanChomskyGrammar = removeDeadRules(chomskyGrammar, "S_0").sortBy(x => x.lhs)
@main def main = println(printGrammar(cleanChomskyGrammar))
/* println(printGrammar(chomskyGrammar))
println("---")
println(printGrammar(cleanChomskyGrammar))
val root = CYK(cleanChomskyGrammar, Vector("local", "Name", "=", "Numeral", "+", "Numeral", "+", "(", "Numeral", "*", "Numeral", ")"), "S_0").get
println(root)
println("===============\n\n\n")
println(disambiguate(root)) */

View File

@ -0,0 +1,9 @@
// For more information on writing tests, see
// https://scalameta.org/munit/docs/getting-started.html
class MySuite extends munit.FunSuite {
test("example test that succeeds") {
val obtained = 42
val expected = 42
assertEquals(obtained, expected)
}
}